LLVM  13.0.1
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
125  "ppc-quadword-atomics",
126  cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
127  cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
139 
140 // FIXME: Remove this once the bug has been fixed!
142 
144  const PPCSubtarget &STI)
145  : TargetLowering(TM), Subtarget(STI) {
146  // Initialize map that relates the PPC addressing modes to the computed flags
147  // of a load/store instruction. The map is used to determine the optimal
148  // addressing mode when selecting load and stores.
149  initializeAddrModeMap();
150  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
151  // arguments are at least 4/8 bytes aligned.
152  bool isPPC64 = Subtarget.isPPC64();
153  setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
154 
155  // Set up the register classes.
156  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
157  if (!useSoftFloat()) {
158  if (hasSPE()) {
159  addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
160  // EFPU2 APU only supports f32
161  if (!Subtarget.hasEFPU2())
162  addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
163  } else {
164  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
165  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
166  }
167  }
168 
169  // Match BITREVERSE to customized fast code sequence in the td file.
172 
173  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
175 
176  // Custom lower inline assembly to check for special registers.
179 
180  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
181  for (MVT VT : MVT::integer_valuetypes()) {
184  }
185 
186  if (Subtarget.isISA3_0()) {
191  } else {
192  // No extending loads from f16 or HW conversions back and forth.
201  }
202 
204 
205  // PowerPC has pre-inc load and store's.
216  if (!Subtarget.hasSPE()) {
221  }
222 
223  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
224  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
225  for (MVT VT : ScalarIntVTs) {
230  }
231 
232  if (Subtarget.useCRBits()) {
234 
235  if (isPPC64 || Subtarget.hasFPCVT()) {
238  isPPC64 ? MVT::i64 : MVT::i32);
241  isPPC64 ? MVT::i64 : MVT::i32);
242 
245  isPPC64 ? MVT::i64 : MVT::i32);
248  isPPC64 ? MVT::i64 : MVT::i32);
249 
252  isPPC64 ? MVT::i64 : MVT::i32);
255  isPPC64 ? MVT::i64 : MVT::i32);
256 
259  isPPC64 ? MVT::i64 : MVT::i32);
262  isPPC64 ? MVT::i64 : MVT::i32);
263  } else {
268  }
269 
270  // PowerPC does not support direct load/store of condition registers.
273 
274  // FIXME: Remove this once the ANDI glue bug is fixed:
275  if (ANDIGlueBug)
277 
278  for (MVT VT : MVT::integer_valuetypes()) {
282  }
283 
284  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
285  }
286 
287  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
288  // PPC (the libcall is not available).
293 
294  // We do not currently implement these libm ops for PowerPC.
301 
302  // PowerPC has no SREM/UREM instructions unless we are on P9
303  // On P9 we may use a hardware instruction to compute the remainder.
304  // When the result of both the remainder and the division is required it is
305  // more efficient to compute the remainder from the result of the division
306  // rather than use the remainder instruction. The instructions are legalized
307  // directly because the DivRemPairsPass performs the transformation at the IR
308  // level.
309  if (Subtarget.isISA3_0()) {
314  } else {
319  }
320 
321  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
330 
331  // Handle constrained floating-point operations of scalar.
332  // TODO: Handle SPE specific operation.
338 
343 
344  if (!Subtarget.hasSPE()) {
347  }
348 
349  if (Subtarget.hasVSX()) {
352  }
353 
354  if (Subtarget.hasFSQRT()) {
357  }
358 
359  if (Subtarget.hasFPRND()) {
364 
369  }
370 
371  // We don't support sin/cos/sqrt/fmod/pow
382  if (Subtarget.hasSPE()) {
385  } else {
388  }
389 
390  if (Subtarget.hasSPE())
392 
394 
395  // If we're enabling GP optimizations, use hardware square root
396  if (!Subtarget.hasFSQRT() &&
397  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
398  Subtarget.hasFRE()))
400 
401  if (!Subtarget.hasFSQRT() &&
402  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
403  Subtarget.hasFRES()))
405 
406  if (Subtarget.hasFCPSGN()) {
409  } else {
412  }
413 
414  if (Subtarget.hasFPRND()) {
419 
424  }
425 
426  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
427  // to speed up scalar BSWAP64.
428  // CTPOP or CTTZ were introduced in P8/P9 respectively
430  if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
432  else
434  if (Subtarget.isISA3_0()) {
437  } else {
440  }
441 
442  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
445  } else {
448  }
449 
450  // PowerPC does not have ROTR
453 
454  if (!Subtarget.useCRBits()) {
455  // PowerPC does not have Select
460  }
461 
462  // PowerPC wants to turn select_cc of FP into fsel when possible.
465 
466  // PowerPC wants to optimize integer setcc a bit
467  if (!Subtarget.useCRBits())
469 
470  if (Subtarget.hasFPU()) {
474 
478  }
479 
480  // PowerPC does not have BRCOND which requires SetCC
481  if (!Subtarget.useCRBits())
483 
485 
486  if (Subtarget.hasSPE()) {
487  // SPE has built-in conversions
494 
495  // SPE supports signaling compare of f32/f64.
498  } else {
499  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
502 
503  // PowerPC does not have [U|S]INT_TO_FP
508  }
509 
510  if (Subtarget.hasDirectMove() && isPPC64) {
515  if (TM.Options.UnsafeFPMath) {
524  }
525  } else {
530  }
531 
532  // We cannot sextinreg(i1). Expand to shifts.
534 
535  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
536  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
537  // support continuation, user-level threading, and etc.. As a result, no
538  // other SjLj exception interfaces are implemented and please don't build
539  // your own exception handling based on them.
540  // LLVM/Clang supports zero-cost DWARF exception handling.
543 
544  // We want to legalize GlobalAddress and ConstantPool nodes into the
545  // appropriate instructions to materialize the address.
556 
557  // TRAP is legal.
559 
560  // TRAMPOLINE is custom lowered.
563 
564  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
566 
567  if (Subtarget.is64BitELFABI()) {
568  // VAARG always uses double-word chunks, so promote anything smaller.
578  } else if (Subtarget.is32BitELFABI()) {
579  // VAARG is custom lowered with the 32-bit SVR4 ABI.
582  } else
584 
585  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
586  if (Subtarget.is32BitELFABI())
588  else
590 
591  // Use the default implementation.
601 
602  // We want to custom lower some of our intrinsics.
604 
605  // To handle counter-based loop conditions.
607 
612 
613  // Comparisons that require checking two conditions.
614  if (Subtarget.hasSPE()) {
619  }
632 
635 
636  if (Subtarget.has64BitSupport()) {
637  // They also have instructions for converting between i64 and fp.
646  // This is just the low 32 bits of a (signed) fp->i64 conversion.
647  // We cannot do this with Promote because i64 is not a legal type.
650 
651  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
654  }
655  } else {
656  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
657  if (Subtarget.hasSPE()) {
660  } else {
663  }
664  }
665 
666  // With the instructions enabled under FPCVT, we can do everything.
667  if (Subtarget.hasFPCVT()) {
668  if (Subtarget.has64BitSupport()) {
677  }
678 
687  }
688 
689  if (Subtarget.use64BitRegs()) {
690  // 64-bit PowerPC implementations can support i64 types directly
691  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
692  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
694  // 64-bit PowerPC wants to expand i128 shifts itself.
698  } else {
699  // 32-bit PowerPC wants to expand i64 shifts itself.
703  }
704 
705  // PowerPC has better expansions for funnel shifts than the generic
706  // TargetLowering::expandFunnelShift.
707  if (Subtarget.has64BitSupport()) {
710  }
713 
714  if (Subtarget.hasVSX()) {
719  }
720 
721  if (Subtarget.hasAltivec()) {
722  for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
727  }
728  // First set operation action for all vector types to expand. Then we
729  // will selectively turn on ones that can be effectively codegen'd.
730  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
731  // add/sub are legal for all supported vector VT's.
734 
735  // For v2i64, these are only valid with P8Vector. This is corrected after
736  // the loop.
737  if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
742  }
743  else {
748  }
749 
750  if (Subtarget.hasVSX()) {
753  }
754 
755  // Vector instructions introduced in P8
756  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
759  }
760  else {
763  }
764 
765  // Vector instructions introduced in P9
766  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
768  else
770 
771  // We promote all shuffles to v16i8.
774 
775  // We promote all non-typed operations to v4i32.
791 
792  // No other operations are legal.
830 
831  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
832  setTruncStoreAction(VT, InnerVT, Expand);
833  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
834  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
835  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
836  }
837  }
839  if (!Subtarget.hasP8Vector()) {
844  }
845 
846  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
847  // with merges, splats, etc.
849 
850  // Vector truncates to sub-word integer that fit in an Altivec/VSX register
851  // are cheap, so handle them before they get expanded to scalar.
857 
863  Subtarget.useCRBits() ? Legal : Expand);
877 
878  // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
880  // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
881  if (Subtarget.hasAltivec())
882  for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
884  // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
885  if (Subtarget.hasP8Altivec())
887 
888  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
889  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
890  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
891  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
892 
895 
896  if (Subtarget.hasVSX()) {
900  }
901 
902  if (Subtarget.hasP8Altivec())
904  else
906 
907  if (Subtarget.isISA3_1()) {
926  }
927 
930 
933 
938 
939  // Altivec does not contain unordered floating-point compare instructions
944 
945  if (Subtarget.hasVSX()) {
948  if (Subtarget.hasP8Vector()) {
951  }
952  if (Subtarget.hasDirectMove() && isPPC64) {
961  }
963 
964  // The nearbyint variants are not allowed to raise the inexact exception
965  // so we can only code-gen them with unsafe math.
966  if (TM.Options.UnsafeFPMath) {
969  }
970 
979 
985 
988 
991 
992  // Share the Altivec comparison restrictions.
997 
1000 
1002 
1003  if (Subtarget.hasP8Vector())
1004  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1005 
1006  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1007 
1008  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1009  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1010  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1011 
1012  if (Subtarget.hasP8Altivec()) {
1016 
1017  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1018  // SRL, but not for SRA because of the instructions available:
1019  // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1020  // doing
1024 
1026  }
1027  else {
1031 
1033 
1034  // VSX v2i64 only supports non-arithmetic operations.
1037  }
1038 
1039  if (Subtarget.isISA3_1())
1041  else
1043 
1048 
1050 
1059 
1060  // Custom handling for partial vectors of integers converted to
1061  // floating point. We already have optimal handling for v2i32 through
1062  // the DAG combine, so those aren't necessary.
1079 
1086 
1089 
1090  // Handle constrained floating-point operations of vector.
1091  // The predictor is `hasVSX` because altivec instruction has
1092  // no exception but VSX vector instruction has.
1106 
1120 
1121  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1122  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1123 
1124  for (MVT FPT : MVT::fp_valuetypes())
1126 
1127  // Expand the SELECT to SELECT_CC
1129 
1132 
1133  // No implementation for these ops for PowerPC.
1139  }
1140 
1141  if (Subtarget.hasP8Altivec()) {
1142  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1143  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1144  }
1145 
1146  if (Subtarget.hasP9Vector()) {
1149 
1150  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1151  // SRL, but not for SRA because of the instructions available:
1152  // VS{RL} and VS{RL}O.
1156 
1162 
1170 
1177 
1181 
1182  // Handle constrained floating-point operations of fp128
1203  } else if (Subtarget.hasVSX()) {
1206 
1209 
1210  // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1211  // fp_to_uint and int_to_fp.
1214 
1222 
1223  // Expand the fp_extend if the target type is fp128.
1226 
1227  // Expand the fp_round if the source type is fp128.
1228  for (MVT VT : {MVT::f32, MVT::f64}) {
1231  }
1232 
1237 
1238  // Lower following f128 select_cc pattern:
1239  // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1241 
1242  // We need to handle f128 SELECT_CC with integer result type.
1245  }
1246 
1247  if (Subtarget.hasP9Altivec()) {
1250 
1258  }
1259 
1260  if (Subtarget.isISA3_1())
1262  }
1263 
1264  if (Subtarget.pairedVectorMemops()) {
1265  addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1268  }
1269  if (Subtarget.hasMMA()) {
1270  addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1274  }
1275 
1276  if (Subtarget.has64BitSupport())
1278 
1279  if (Subtarget.isISA3_1())
1281 
1283 
1284  if (!isPPC64) {
1287  }
1288 
1289  if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics())
1291 
1293 
1294  if (Subtarget.hasAltivec()) {
1295  // Altivec instructions set fields to all zeros or all ones.
1297  }
1298 
1299  if (!isPPC64) {
1300  // These libcalls are not available in 32-bit.
1301  setLibcallName(RTLIB::SHL_I128, nullptr);
1302  setLibcallName(RTLIB::SRL_I128, nullptr);
1303  setLibcallName(RTLIB::SRA_I128, nullptr);
1304  }
1305 
1306  if (!isPPC64)
1308 
1309  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1310 
1311  // We have target-specific dag combine patterns for the following nodes:
1320  if (Subtarget.hasFPCVT())
1325  if (Subtarget.useCRBits())
1331 
1335 
1338 
1339 
1340  if (Subtarget.useCRBits()) {
1344  }
1345 
1346  if (Subtarget.hasP9Altivec()) {
1349  }
1350 
1351  setLibcallName(RTLIB::LOG_F128, "logf128");
1352  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1353  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1354  setLibcallName(RTLIB::EXP_F128, "expf128");
1355  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1356  setLibcallName(RTLIB::SIN_F128, "sinf128");
1357  setLibcallName(RTLIB::COS_F128, "cosf128");
1358  setLibcallName(RTLIB::POW_F128, "powf128");
1359  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1360  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1361  setLibcallName(RTLIB::REM_F128, "fmodf128");
1362  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1363  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1364  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1365  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1366  setLibcallName(RTLIB::ROUND_F128, "roundf128");
1367  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1368  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1369  setLibcallName(RTLIB::RINT_F128, "rintf128");
1370  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1371  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1372  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1373  setLibcallName(RTLIB::FMA_F128, "fmaf128");
1374 
1375  // With 32 condition bits, we don't need to sink (and duplicate) compares
1376  // aggressively in CodeGenPrep.
1377  if (Subtarget.useCRBits()) {
1380  }
1381 
1383 
1384  switch (Subtarget.getCPUDirective()) {
1385  default: break;
1386  case PPC::DIR_970:
1387  case PPC::DIR_A2:
1388  case PPC::DIR_E500:
1389  case PPC::DIR_E500mc:
1390  case PPC::DIR_E5500:
1391  case PPC::DIR_PWR4:
1392  case PPC::DIR_PWR5:
1393  case PPC::DIR_PWR5X:
1394  case PPC::DIR_PWR6:
1395  case PPC::DIR_PWR6X:
1396  case PPC::DIR_PWR7:
1397  case PPC::DIR_PWR8:
1398  case PPC::DIR_PWR9:
1399  case PPC::DIR_PWR10:
1400  case PPC::DIR_PWR_FUTURE:
1403  break;
1404  }
1405 
1406  if (Subtarget.enableMachineScheduler())
1408  else
1410 
1412 
1413  // The Freescale cores do better with aggressive inlining of memcpy and
1414  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1415  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1416  Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1417  MaxStoresPerMemset = 32;
1419  MaxStoresPerMemcpy = 32;
1421  MaxStoresPerMemmove = 32;
1423  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1424  // The A2 also benefits from (very) aggressive inlining of memcpy and
1425  // friends. The overhead of a the function call, even when warm, can be
1426  // over one hundred cycles.
1427  MaxStoresPerMemset = 128;
1428  MaxStoresPerMemcpy = 128;
1429  MaxStoresPerMemmove = 128;
1430  MaxLoadsPerMemcmp = 128;
1431  } else {
1432  MaxLoadsPerMemcmp = 8;
1434  }
1435 
1436  IsStrictFPEnabled = true;
1437 
1438  // Let the subtarget (CPU) decide if a predictable select is more expensive
1439  // than the corresponding branch. This information is used in CGP to decide
1440  // when to convert selects into branches.
1442 }
1443 
1444 // *********************************** NOTE ************************************
1445 // For selecting load and store instructions, the addressing modes are defined
1446 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1447 // patterns to match the load the store instructions.
1448 //
1449 // The TD definitions for the addressing modes correspond to their respective
1450 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1451 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1452 // address mode flags of a particular node. Afterwards, the computed address
1453 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1454 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1455 // accordingly, based on the preferred addressing mode.
1456 //
1457 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1458 // MemOpFlags contains all the possible flags that can be used to compute the
1459 // optimal addressing mode for load and store instructions.
1460 // AddrMode contains all the possible load and store addressing modes available
1461 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1462 //
1463 // When adding new load and store instructions, it is possible that new address
1464 // flags may need to be added into MemOpFlags, and a new addressing mode will
1465 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1466 // of the minimal and main distinguishing address flags for the new load/store
1467 // instructions) will need to be added into initializeAddrModeMap() below.
1468 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1469 // need to be updated to account for selecting the optimal addressing mode.
1470 // *****************************************************************************
1471 /// Initialize the map that relates the different addressing modes of the load
1472 /// and store instructions to a set of flags. This ensures the load/store
1473 /// instruction is correctly matched during instruction selection.
1474 void PPCTargetLowering::initializeAddrModeMap() {
1475  AddrModesMap[PPC::AM_DForm] = {
1476  // LWZ, STW
1481  // LBZ, LHZ, STB, STH
1486  // LHA
1491  // LFS, LFD, STFS, STFD
1496  };
1497  AddrModesMap[PPC::AM_DSForm] = {
1498  // LWA
1502  // LD, STD
1506  // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1510  };
1511  AddrModesMap[PPC::AM_DQForm] = {
1512  // LXV, STXV
1519  };
1520 }
1521 
1522 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1523 /// the desired ByVal argument alignment.
1524 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1525  if (MaxAlign == MaxMaxAlign)
1526  return;
1527  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1528  if (MaxMaxAlign >= 32 &&
1529  VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1530  MaxAlign = Align(32);
1531  else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1532  MaxAlign < 16)
1533  MaxAlign = Align(16);
1534  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1535  Align EltAlign;
1536  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1537  if (EltAlign > MaxAlign)
1538  MaxAlign = EltAlign;
1539  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1540  for (auto *EltTy : STy->elements()) {
1541  Align EltAlign;
1542  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1543  if (EltAlign > MaxAlign)
1544  MaxAlign = EltAlign;
1545  if (MaxAlign == MaxMaxAlign)
1546  break;
1547  }
1548  }
1549 }
1550 
1551 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1552 /// function arguments in the caller parameter area.
1554  const DataLayout &DL) const {
1555  // 16byte and wider vectors are passed on 16byte boundary.
1556  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1557  Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1558  if (Subtarget.hasAltivec())
1559  getMaxByValAlign(Ty, Alignment, Align(16));
1560  return Alignment.value();
1561 }
1562 
1564  return Subtarget.useSoftFloat();
1565 }
1566 
1568  return Subtarget.hasSPE();
1569 }
1570 
1572  return VT.isScalarInteger();
1573 }
1574 
1575 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1576  switch ((PPCISD::NodeType)Opcode) {
1577  case PPCISD::FIRST_NUMBER: break;
1578  case PPCISD::FSEL: return "PPCISD::FSEL";
1579  case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1580  case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1581  case PPCISD::FCFID: return "PPCISD::FCFID";
1582  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1583  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1584  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1585  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1586  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1587  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1588  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1590  return "PPCISD::FP_TO_UINT_IN_VSR,";
1592  return "PPCISD::FP_TO_SINT_IN_VSR";
1593  case PPCISD::FRE: return "PPCISD::FRE";
1594  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1595  case PPCISD::FTSQRT:
1596  return "PPCISD::FTSQRT";
1597  case PPCISD::FSQRT:
1598  return "PPCISD::FSQRT";
1599  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1600  case PPCISD::VPERM: return "PPCISD::VPERM";
1601  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1603  return "PPCISD::XXSPLTI_SP_TO_DP";
1604  case PPCISD::XXSPLTI32DX:
1605  return "PPCISD::XXSPLTI32DX";
1606  case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1607  case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1608  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1609  case PPCISD::CMPB: return "PPCISD::CMPB";
1610  case PPCISD::Hi: return "PPCISD::Hi";
1611  case PPCISD::Lo: return "PPCISD::Lo";
1612  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1613  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1614  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1615  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1616  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1617  case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1618  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1619  case PPCISD::SRL: return "PPCISD::SRL";
1620  case PPCISD::SRA: return "PPCISD::SRA";
1621  case PPCISD::SHL: return "PPCISD::SHL";
1622  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1623  case PPCISD::CALL: return "PPCISD::CALL";
1624  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1625  case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1626  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1627  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1628  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1629  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1630  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1631  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1632  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1633  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1634  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1635  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1636  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1637  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1638  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1640  return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1642  return "PPCISD::ANDI_rec_1_EQ_BIT";
1644  return "PPCISD::ANDI_rec_1_GT_BIT";
1645  case PPCISD::VCMP: return "PPCISD::VCMP";
1646  case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1647  case PPCISD::LBRX: return "PPCISD::LBRX";
1648  case PPCISD::STBRX: return "PPCISD::STBRX";
1649  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1650  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1651  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1652  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1653  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1654  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1655  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1656  case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1657  case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1659  return "PPCISD::ST_VSR_SCAL_INT";
1660  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1661  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1662  case PPCISD::BDZ: return "PPCISD::BDZ";
1663  case PPCISD::MFFS: return "PPCISD::MFFS";
1664  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1665  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1666  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1667  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1668  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1669  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1670  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1671  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1672  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1673  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1674  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1675  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1676  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1677  case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1678  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1679  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1680  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1681  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1682  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1683  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1684  case PPCISD::PADDI_DTPREL:
1685  return "PPCISD::PADDI_DTPREL";
1686  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1687  case PPCISD::SC: return "PPCISD::SC";
1688  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1689  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1690  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1691  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1692  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1693  case PPCISD::VABSD: return "PPCISD::VABSD";
1694  case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1695  case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1696  case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1697  case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1698  case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1699  case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1700  case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1702  return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1704  return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1705  case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1706  case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1707  case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1708  case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1709  case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1710  case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1712  return "PPCISD::STRICT_FADDRTZ";
1713  case PPCISD::STRICT_FCTIDZ:
1714  return "PPCISD::STRICT_FCTIDZ";
1715  case PPCISD::STRICT_FCTIWZ:
1716  return "PPCISD::STRICT_FCTIWZ";
1718  return "PPCISD::STRICT_FCTIDUZ";
1720  return "PPCISD::STRICT_FCTIWUZ";
1721  case PPCISD::STRICT_FCFID:
1722  return "PPCISD::STRICT_FCFID";
1723  case PPCISD::STRICT_FCFIDU:
1724  return "PPCISD::STRICT_FCFIDU";
1725  case PPCISD::STRICT_FCFIDS:
1726  return "PPCISD::STRICT_FCFIDS";
1728  return "PPCISD::STRICT_FCFIDUS";
1729  case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1730  }
1731  return nullptr;
1732 }
1733 
1735  EVT VT) const {
1736  if (!VT.isVector())
1737  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1738 
1740 }
1741 
1743  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1744  return true;
1745 }
1746 
1747 //===----------------------------------------------------------------------===//
1748 // Node matching predicates, for use by the tblgen matching code.
1749 //===----------------------------------------------------------------------===//
1750 
1751 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1753  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1754  return CFP->getValueAPF().isZero();
1755  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1756  // Maybe this has already been legalized into the constant pool?
1757  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1758  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1759  return CFP->getValueAPF().isZero();
1760  }
1761  return false;
1762 }
1763 
1764 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1765 /// true if Op is undef or if it matches the specified value.
1766 static bool isConstantOrUndef(int Op, int Val) {
1767  return Op < 0 || Op == Val;
1768 }
1769 
1770 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1771 /// VPKUHUM instruction.
1772 /// The ShuffleKind distinguishes between big-endian operations with
1773 /// two different inputs (0), either-endian operations with two identical
1774 /// inputs (1), and little-endian operations with two different inputs (2).
1775 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1777  SelectionDAG &DAG) {
1778  bool IsLE = DAG.getDataLayout().isLittleEndian();
1779  if (ShuffleKind == 0) {
1780  if (IsLE)
1781  return false;
1782  for (unsigned i = 0; i != 16; ++i)
1783  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1784  return false;
1785  } else if (ShuffleKind == 2) {
1786  if (!IsLE)
1787  return false;
1788  for (unsigned i = 0; i != 16; ++i)
1789  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1790  return false;
1791  } else if (ShuffleKind == 1) {
1792  unsigned j = IsLE ? 0 : 1;
1793  for (unsigned i = 0; i != 8; ++i)
1794  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1795  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1796  return false;
1797  }
1798  return true;
1799 }
1800 
1801 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1802 /// VPKUWUM instruction.
1803 /// The ShuffleKind distinguishes between big-endian operations with
1804 /// two different inputs (0), either-endian operations with two identical
1805 /// inputs (1), and little-endian operations with two different inputs (2).
1806 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1808  SelectionDAG &DAG) {
1809  bool IsLE = DAG.getDataLayout().isLittleEndian();
1810  if (ShuffleKind == 0) {
1811  if (IsLE)
1812  return false;
1813  for (unsigned i = 0; i != 16; i += 2)
1814  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1815  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1816  return false;
1817  } else if (ShuffleKind == 2) {
1818  if (!IsLE)
1819  return false;
1820  for (unsigned i = 0; i != 16; i += 2)
1821  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1822  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1823  return false;
1824  } else if (ShuffleKind == 1) {
1825  unsigned j = IsLE ? 0 : 2;
1826  for (unsigned i = 0; i != 8; i += 2)
1827  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1828  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1829  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1830  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1831  return false;
1832  }
1833  return true;
1834 }
1835 
1836 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1837 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1838 /// current subtarget.
1839 ///
1840 /// The ShuffleKind distinguishes between big-endian operations with
1841 /// two different inputs (0), either-endian operations with two identical
1842 /// inputs (1), and little-endian operations with two different inputs (2).
1843 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1845  SelectionDAG &DAG) {
1846  const PPCSubtarget& Subtarget =
1847  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1848  if (!Subtarget.hasP8Vector())
1849  return false;
1850 
1851  bool IsLE = DAG.getDataLayout().isLittleEndian();
1852  if (ShuffleKind == 0) {
1853  if (IsLE)
1854  return false;
1855  for (unsigned i = 0; i != 16; i += 4)
1856  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1857  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1858  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1859  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1860  return false;
1861  } else if (ShuffleKind == 2) {
1862  if (!IsLE)
1863  return false;
1864  for (unsigned i = 0; i != 16; i += 4)
1865  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1866  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1867  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1868  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1869  return false;
1870  } else if (ShuffleKind == 1) {
1871  unsigned j = IsLE ? 0 : 4;
1872  for (unsigned i = 0; i != 8; i += 4)
1873  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1874  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1875  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1876  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1877  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1878  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1879  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1880  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1881  return false;
1882  }
1883  return true;
1884 }
1885 
1886 /// isVMerge - Common function, used to match vmrg* shuffles.
1887 ///
1888 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1889  unsigned LHSStart, unsigned RHSStart) {
1890  if (N->getValueType(0) != MVT::v16i8)
1891  return false;
1892  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1893  "Unsupported merge size!");
1894 
1895  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1896  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1897  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1898  LHSStart+j+i*UnitSize) ||
1899  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1900  RHSStart+j+i*UnitSize))
1901  return false;
1902  }
1903  return true;
1904 }
1905 
1906 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1907 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1908 /// The ShuffleKind distinguishes between big-endian merges with two
1909 /// different inputs (0), either-endian merges with two identical inputs (1),
1910 /// and little-endian merges with two different inputs (2). For the latter,
1911 /// the input operands are swapped (see PPCInstrAltivec.td).
1913  unsigned ShuffleKind, SelectionDAG &DAG) {
1914  if (DAG.getDataLayout().isLittleEndian()) {
1915  if (ShuffleKind == 1) // unary
1916  return isVMerge(N, UnitSize, 0, 0);
1917  else if (ShuffleKind == 2) // swapped
1918  return isVMerge(N, UnitSize, 0, 16);
1919  else
1920  return false;
1921  } else {
1922  if (ShuffleKind == 1) // unary
1923  return isVMerge(N, UnitSize, 8, 8);
1924  else if (ShuffleKind == 0) // normal
1925  return isVMerge(N, UnitSize, 8, 24);
1926  else
1927  return false;
1928  }
1929 }
1930 
1931 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1932 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1933 /// The ShuffleKind distinguishes between big-endian merges with two
1934 /// different inputs (0), either-endian merges with two identical inputs (1),
1935 /// and little-endian merges with two different inputs (2). For the latter,
1936 /// the input operands are swapped (see PPCInstrAltivec.td).
1938  unsigned ShuffleKind, SelectionDAG &DAG) {
1939  if (DAG.getDataLayout().isLittleEndian()) {
1940  if (ShuffleKind == 1) // unary
1941  return isVMerge(N, UnitSize, 8, 8);
1942  else if (ShuffleKind == 2) // swapped
1943  return isVMerge(N, UnitSize, 8, 24);
1944  else
1945  return false;
1946  } else {
1947  if (ShuffleKind == 1) // unary
1948  return isVMerge(N, UnitSize, 0, 0);
1949  else if (ShuffleKind == 0) // normal
1950  return isVMerge(N, UnitSize, 0, 16);
1951  else
1952  return false;
1953  }
1954 }
1955 
1956 /**
1957  * Common function used to match vmrgew and vmrgow shuffles
1958  *
1959  * The indexOffset determines whether to look for even or odd words in
1960  * the shuffle mask. This is based on the of the endianness of the target
1961  * machine.
1962  * - Little Endian:
1963  * - Use offset of 0 to check for odd elements
1964  * - Use offset of 4 to check for even elements
1965  * - Big Endian:
1966  * - Use offset of 0 to check for even elements
1967  * - Use offset of 4 to check for odd elements
1968  * A detailed description of the vector element ordering for little endian and
1969  * big endian can be found at
1970  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1971  * Targeting your applications - what little endian and big endian IBM XL C/C++
1972  * compiler differences mean to you
1973  *
1974  * The mask to the shuffle vector instruction specifies the indices of the
1975  * elements from the two input vectors to place in the result. The elements are
1976  * numbered in array-access order, starting with the first vector. These vectors
1977  * are always of type v16i8, thus each vector will contain 16 elements of size
1978  * 8. More info on the shuffle vector can be found in the
1979  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1980  * Language Reference.
1981  *
1982  * The RHSStartValue indicates whether the same input vectors are used (unary)
1983  * or two different input vectors are used, based on the following:
1984  * - If the instruction uses the same vector for both inputs, the range of the
1985  * indices will be 0 to 15. In this case, the RHSStart value passed should
1986  * be 0.
1987  * - If the instruction has two different vectors then the range of the
1988  * indices will be 0 to 31. In this case, the RHSStart value passed should
1989  * be 16 (indices 0-15 specify elements in the first vector while indices 16
1990  * to 31 specify elements in the second vector).
1991  *
1992  * \param[in] N The shuffle vector SD Node to analyze
1993  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1994  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1995  * vector to the shuffle_vector instruction
1996  * \return true iff this shuffle vector represents an even or odd word merge
1997  */
1998 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1999  unsigned RHSStartValue) {
2000  if (N->getValueType(0) != MVT::v16i8)
2001  return false;
2002 
2003  for (unsigned i = 0; i < 2; ++i)
2004  for (unsigned j = 0; j < 4; ++j)
2005  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2006  i*RHSStartValue+j+IndexOffset) ||
2007  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2008  i*RHSStartValue+j+IndexOffset+8))
2009  return false;
2010  return true;
2011 }
2012 
2013 /**
2014  * Determine if the specified shuffle mask is suitable for the vmrgew or
2015  * vmrgow instructions.
2016  *
2017  * \param[in] N The shuffle vector SD Node to analyze
2018  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2019  * \param[in] ShuffleKind Identify the type of merge:
2020  * - 0 = big-endian merge with two different inputs;
2021  * - 1 = either-endian merge with two identical inputs;
2022  * - 2 = little-endian merge with two different inputs (inputs are swapped for
2023  * little-endian merges).
2024  * \param[in] DAG The current SelectionDAG
2025  * \return true iff this shuffle mask
2026  */
2028  unsigned ShuffleKind, SelectionDAG &DAG) {
2029  if (DAG.getDataLayout().isLittleEndian()) {
2030  unsigned indexOffset = CheckEven ? 4 : 0;
2031  if (ShuffleKind == 1) // Unary
2032  return isVMerge(N, indexOffset, 0);
2033  else if (ShuffleKind == 2) // swapped
2034  return isVMerge(N, indexOffset, 16);
2035  else
2036  return false;
2037  }
2038  else {
2039  unsigned indexOffset = CheckEven ? 0 : 4;
2040  if (ShuffleKind == 1) // Unary
2041  return isVMerge(N, indexOffset, 0);
2042  else if (ShuffleKind == 0) // Normal
2043  return isVMerge(N, indexOffset, 16);
2044  else
2045  return false;
2046  }
2047  return false;
2048 }
2049 
2050 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2051 /// amount, otherwise return -1.
2052 /// The ShuffleKind distinguishes between big-endian operations with two
2053 /// different inputs (0), either-endian operations with two identical inputs
2054 /// (1), and little-endian operations with two different inputs (2). For the
2055 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2056 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2057  SelectionDAG &DAG) {
2058  if (N->getValueType(0) != MVT::v16i8)
2059  return -1;
2060 
2061  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2062 
2063  // Find the first non-undef value in the shuffle mask.
2064  unsigned i;
2065  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2066  /*search*/;
2067 
2068  if (i == 16) return -1; // all undef.
2069 
2070  // Otherwise, check to see if the rest of the elements are consecutively
2071  // numbered from this value.
2072  unsigned ShiftAmt = SVOp->getMaskElt(i);
2073  if (ShiftAmt < i) return -1;
2074 
2075  ShiftAmt -= i;
2076  bool isLE = DAG.getDataLayout().isLittleEndian();
2077 
2078  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2079  // Check the rest of the elements to see if they are consecutive.
2080  for (++i; i != 16; ++i)
2081  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2082  return -1;
2083  } else if (ShuffleKind == 1) {
2084  // Check the rest of the elements to see if they are consecutive.
2085  for (++i; i != 16; ++i)
2086  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2087  return -1;
2088  } else
2089  return -1;
2090 
2091  if (isLE)
2092  ShiftAmt = 16 - ShiftAmt;
2093 
2094  return ShiftAmt;
2095 }
2096 
2097 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2098 /// specifies a splat of a single element that is suitable for input to
2099 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2101  assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2102  EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2103 
2104  // The consecutive indices need to specify an element, not part of two
2105  // different elements. So abandon ship early if this isn't the case.
2106  if (N->getMaskElt(0) % EltSize != 0)
2107  return false;
2108 
2109  // This is a splat operation if each element of the permute is the same, and
2110  // if the value doesn't reference the second vector.
2111  unsigned ElementBase = N->getMaskElt(0);
2112 
2113  // FIXME: Handle UNDEF elements too!
2114  if (ElementBase >= 16)
2115  return false;
2116 
2117  // Check that the indices are consecutive, in the case of a multi-byte element
2118  // splatted with a v16i8 mask.
2119  for (unsigned i = 1; i != EltSize; ++i)
2120  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2121  return false;
2122 
2123  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2124  if (N->getMaskElt(i) < 0) continue;
2125  for (unsigned j = 0; j != EltSize; ++j)
2126  if (N->getMaskElt(i+j) != N->getMaskElt(j))
2127  return false;
2128  }
2129  return true;
2130 }
2131 
2132 /// Check that the mask is shuffling N byte elements. Within each N byte
2133 /// element of the mask, the indices could be either in increasing or
2134 /// decreasing order as long as they are consecutive.
2135 /// \param[in] N the shuffle vector SD Node to analyze
2136 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2137 /// Word/DoubleWord/QuadWord).
2138 /// \param[in] StepLen the delta indices number among the N byte element, if
2139 /// the mask is in increasing/decreasing order then it is 1/-1.
2140 /// \return true iff the mask is shuffling N byte elements.
2142  int StepLen) {
2143  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2144  "Unexpected element width.");
2145  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2146 
2147  unsigned NumOfElem = 16 / Width;
2148  unsigned MaskVal[16]; // Width is never greater than 16
2149  for (unsigned i = 0; i < NumOfElem; ++i) {
2150  MaskVal[0] = N->getMaskElt(i * Width);
2151  if ((StepLen == 1) && (MaskVal[0] % Width)) {
2152  return false;
2153  } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2154  return false;
2155  }
2156 
2157  for (unsigned int j = 1; j < Width; ++j) {
2158  MaskVal[j] = N->getMaskElt(i * Width + j);
2159  if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2160  return false;
2161  }
2162  }
2163  }
2164 
2165  return true;
2166 }
2167 
2168 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2169  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2170  if (!isNByteElemShuffleMask(N, 4, 1))
2171  return false;
2172 
2173  // Now we look at mask elements 0,4,8,12
2174  unsigned M0 = N->getMaskElt(0) / 4;
2175  unsigned M1 = N->getMaskElt(4) / 4;
2176  unsigned M2 = N->getMaskElt(8) / 4;
2177  unsigned M3 = N->getMaskElt(12) / 4;
2178  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2179  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2180 
2181  // Below, let H and L be arbitrary elements of the shuffle mask
2182  // where H is in the range [4,7] and L is in the range [0,3].
2183  // H, 1, 2, 3 or L, 5, 6, 7
2184  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2185  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2186  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2187  InsertAtByte = IsLE ? 12 : 0;
2188  Swap = M0 < 4;
2189  return true;
2190  }
2191  // 0, H, 2, 3 or 4, L, 6, 7
2192  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2193  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2194  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2195  InsertAtByte = IsLE ? 8 : 4;
2196  Swap = M1 < 4;
2197  return true;
2198  }
2199  // 0, 1, H, 3 or 4, 5, L, 7
2200  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2201  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2202  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2203  InsertAtByte = IsLE ? 4 : 8;
2204  Swap = M2 < 4;
2205  return true;
2206  }
2207  // 0, 1, 2, H or 4, 5, 6, L
2208  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2209  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2210  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2211  InsertAtByte = IsLE ? 0 : 12;
2212  Swap = M3 < 4;
2213  return true;
2214  }
2215 
2216  // If both vector operands for the shuffle are the same vector, the mask will
2217  // contain only elements from the first one and the second one will be undef.
2218  if (N->getOperand(1).isUndef()) {
2219  ShiftElts = 0;
2220  Swap = true;
2221  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2222  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2223  InsertAtByte = IsLE ? 12 : 0;
2224  return true;
2225  }
2226  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2227  InsertAtByte = IsLE ? 8 : 4;
2228  return true;
2229  }
2230  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2231  InsertAtByte = IsLE ? 4 : 8;
2232  return true;
2233  }
2234  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2235  InsertAtByte = IsLE ? 0 : 12;
2236  return true;
2237  }
2238  }
2239 
2240  return false;
2241 }
2242 
2244  bool &Swap, bool IsLE) {
2245  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2246  // Ensure each byte index of the word is consecutive.
2247  if (!isNByteElemShuffleMask(N, 4, 1))
2248  return false;
2249 
2250  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2251  unsigned M0 = N->getMaskElt(0) / 4;
2252  unsigned M1 = N->getMaskElt(4) / 4;
2253  unsigned M2 = N->getMaskElt(8) / 4;
2254  unsigned M3 = N->getMaskElt(12) / 4;
2255 
2256  // If both vector operands for the shuffle are the same vector, the mask will
2257  // contain only elements from the first one and the second one will be undef.
2258  if (N->getOperand(1).isUndef()) {
2259  assert(M0 < 4 && "Indexing into an undef vector?");
2260  if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2261  return false;
2262 
2263  ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2264  Swap = false;
2265  return true;
2266  }
2267 
2268  // Ensure each word index of the ShuffleVector Mask is consecutive.
2269  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2270  return false;
2271 
2272  if (IsLE) {
2273  if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2274  // Input vectors don't need to be swapped if the leading element
2275  // of the result is one of the 3 left elements of the second vector
2276  // (or if there is no shift to be done at all).
2277  Swap = false;
2278  ShiftElts = (8 - M0) % 8;
2279  } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2280  // Input vectors need to be swapped if the leading element
2281  // of the result is one of the 3 left elements of the first vector
2282  // (or if we're shifting by 4 - thereby simply swapping the vectors).
2283  Swap = true;
2284  ShiftElts = (4 - M0) % 4;
2285  }
2286 
2287  return true;
2288  } else { // BE
2289  if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2290  // Input vectors don't need to be swapped if the leading element
2291  // of the result is one of the 4 elements of the first vector.
2292  Swap = false;
2293  ShiftElts = M0;
2294  } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2295  // Input vectors need to be swapped if the leading element
2296  // of the result is one of the 4 elements of the right vector.
2297  Swap = true;
2298  ShiftElts = M0 - 4;
2299  }
2300 
2301  return true;
2302  }
2303 }
2304 
2306  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2307 
2308  if (!isNByteElemShuffleMask(N, Width, -1))
2309  return false;
2310 
2311  for (int i = 0; i < 16; i += Width)
2312  if (N->getMaskElt(i) != i + Width - 1)
2313  return false;
2314 
2315  return true;
2316 }
2317 
2319  return isXXBRShuffleMaskHelper(N, 2);
2320 }
2321 
2323  return isXXBRShuffleMaskHelper(N, 4);
2324 }
2325 
2327  return isXXBRShuffleMaskHelper(N, 8);
2328 }
2329 
2331  return isXXBRShuffleMaskHelper(N, 16);
2332 }
2333 
2334 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2335 /// if the inputs to the instruction should be swapped and set \p DM to the
2336 /// value for the immediate.
2337 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2338 /// AND element 0 of the result comes from the first input (LE) or second input
2339 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2340 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2341 /// mask.
2343  bool &Swap, bool IsLE) {
2344  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2345 
2346  // Ensure each byte index of the double word is consecutive.
2347  if (!isNByteElemShuffleMask(N, 8, 1))
2348  return false;
2349 
2350  unsigned M0 = N->getMaskElt(0) / 8;
2351  unsigned M1 = N->getMaskElt(8) / 8;
2352  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2353 
2354  // If both vector operands for the shuffle are the same vector, the mask will
2355  // contain only elements from the first one and the second one will be undef.
2356  if (N->getOperand(1).isUndef()) {
2357  if ((M0 | M1) < 2) {
2358  DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2359  Swap = false;
2360  return true;
2361  } else
2362  return false;
2363  }
2364 
2365  if (IsLE) {
2366  if (M0 > 1 && M1 < 2) {
2367  Swap = false;
2368  } else if (M0 < 2 && M1 > 1) {
2369  M0 = (M0 + 2) % 4;
2370  M1 = (M1 + 2) % 4;
2371  Swap = true;
2372  } else
2373  return false;
2374 
2375  // Note: if control flow comes here that means Swap is already set above
2376  DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2377  return true;
2378  } else { // BE
2379  if (M0 < 2 && M1 > 1) {
2380  Swap = false;
2381  } else if (M0 > 1 && M1 < 2) {
2382  M0 = (M0 + 2) % 4;
2383  M1 = (M1 + 2) % 4;
2384  Swap = true;
2385  } else
2386  return false;
2387 
2388  // Note: if control flow comes here that means Swap is already set above
2389  DM = (M0 << 1) + (M1 & 1);
2390  return true;
2391  }
2392 }
2393 
2394 
2395 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2396 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2397 /// elements are counted from the left of the vector register).
2398 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2399  SelectionDAG &DAG) {
2400  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2401  assert(isSplatShuffleMask(SVOp, EltSize));
2402  if (DAG.getDataLayout().isLittleEndian())
2403  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2404  else
2405  return SVOp->getMaskElt(0) / EltSize;
2406 }
2407 
2408 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2409 /// by using a vspltis[bhw] instruction of the specified element size, return
2410 /// the constant being splatted. The ByteSize field indicates the number of
2411 /// bytes of each element [124] -> [bhw].
2412 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2413  SDValue OpVal(nullptr, 0);
2414 
2415  // If ByteSize of the splat is bigger than the element size of the
2416  // build_vector, then we have a case where we are checking for a splat where
2417  // multiple elements of the buildvector are folded together into a single
2418  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2419  unsigned EltSize = 16/N->getNumOperands();
2420  if (EltSize < ByteSize) {
2421  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2422  SDValue UniquedVals[4];
2423  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2424 
2425  // See if all of the elements in the buildvector agree across.
2426  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2427  if (N->getOperand(i).isUndef()) continue;
2428  // If the element isn't a constant, bail fully out.
2429  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2430 
2431  if (!UniquedVals[i&(Multiple-1)].getNode())
2432  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2433  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2434  return SDValue(); // no match.
2435  }
2436 
2437  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2438  // either constant or undef values that are identical for each chunk. See
2439  // if these chunks can form into a larger vspltis*.
2440 
2441  // Check to see if all of the leading entries are either 0 or -1. If
2442  // neither, then this won't fit into the immediate field.
2443  bool LeadingZero = true;
2444  bool LeadingOnes = true;
2445  for (unsigned i = 0; i != Multiple-1; ++i) {
2446  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2447 
2448  LeadingZero &= isNullConstant(UniquedVals[i]);
2449  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2450  }
2451  // Finally, check the least significant entry.
2452  if (LeadingZero) {
2453  if (!UniquedVals[Multiple-1].getNode())
2454  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2455  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2456  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2457  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2458  }
2459  if (LeadingOnes) {
2460  if (!UniquedVals[Multiple-1].getNode())
2461  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2462  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2463  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2464  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2465  }
2466 
2467  return SDValue();
2468  }
2469 
2470  // Check to see if this buildvec has a single non-undef value in its elements.
2471  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2472  if (N->getOperand(i).isUndef()) continue;
2473  if (!OpVal.getNode())
2474  OpVal = N->getOperand(i);
2475  else if (OpVal != N->getOperand(i))
2476  return SDValue();
2477  }
2478 
2479  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2480 
2481  unsigned ValSizeInBytes = EltSize;
2482  uint64_t Value = 0;
2483  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2484  Value = CN->getZExtValue();
2485  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2486  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2487  Value = FloatToBits(CN->getValueAPF().convertToFloat());
2488  }
2489 
2490  // If the splat value is larger than the element value, then we can never do
2491  // this splat. The only case that we could fit the replicated bits into our
2492  // immediate field for would be zero, and we prefer to use vxor for it.
2493  if (ValSizeInBytes < ByteSize) return SDValue();
2494 
2495  // If the element value is larger than the splat value, check if it consists
2496  // of a repeated bit pattern of size ByteSize.
2497  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2498  return SDValue();
2499 
2500  // Properly sign extend the value.
2501  int MaskVal = SignExtend32(Value, ByteSize * 8);
2502 
2503  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2504  if (MaskVal == 0) return SDValue();
2505 
2506  // Finally, if this value fits in a 5 bit sext field, return it
2507  if (SignExtend32<5>(MaskVal) == MaskVal)
2508  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2509  return SDValue();
2510 }
2511 
2512 //===----------------------------------------------------------------------===//
2513 // Addressing Mode Selection
2514 //===----------------------------------------------------------------------===//
2515 
2516 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2517 /// or 64-bit immediate, and if the value can be accurately represented as a
2518 /// sign extension from a 16-bit value. If so, this returns true and the
2519 /// immediate.
2520 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2521  if (!isa<ConstantSDNode>(N))
2522  return false;
2523 
2524  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2525  if (N->getValueType(0) == MVT::i32)
2526  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2527  else
2528  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2529 }
2530 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2531  return isIntS16Immediate(Op.getNode(), Imm);
2532 }
2533 
2534 /// Used when computing address flags for selecting loads and stores.
2535 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2536 /// An OR of two provably disjoint values is equivalent to an ADD.
2537 /// Most PPC load/store instructions compute the effective address as a sum,
2538 /// so doing this conversion is useful.
2539 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2540  if (N.getOpcode() != ISD::OR)
2541  return false;
2542  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2543  if (!LHSKnown.Zero.getBoolValue())
2544  return false;
2545  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2546  return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2547 }
2548 
2549 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2550 /// be represented as an indexed [r+r] operation.
2552  SDValue &Index,
2553  SelectionDAG &DAG) const {
2554  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2555  UI != E; ++UI) {
2556  if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2557  if (Memop->getMemoryVT() == MVT::f64) {
2558  Base = N.getOperand(0);
2559  Index = N.getOperand(1);
2560  return true;
2561  }
2562  }
2563  }
2564  return false;
2565 }
2566 
2567 /// isIntS34Immediate - This method tests if value of node given can be
2568 /// accurately represented as a sign extension from a 34-bit value. If so,
2569 /// this returns true and the immediate.
2570 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2571  if (!isa<ConstantSDNode>(N))
2572  return false;
2573 
2574  Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2575  return isInt<34>(Imm);
2576 }
2577 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2578  return isIntS34Immediate(Op.getNode(), Imm);
2579 }
2580 
2581 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2582 /// can be represented as an indexed [r+r] operation. Returns false if it
2583 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2584 /// non-zero and N can be represented by a base register plus a signed 16-bit
2585 /// displacement, make a more precise judgement by checking (displacement % \p
2586 /// EncodingAlignment).
2589  MaybeAlign EncodingAlignment) const {
2590  // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2591  // a [pc+imm].
2592  if (SelectAddressPCRel(N, Base))
2593  return false;
2594 
2595  int16_t Imm = 0;
2596  if (N.getOpcode() == ISD::ADD) {
2597  // Is there any SPE load/store (f64), which can't handle 16bit offset?
2598  // SPE load/store can only handle 8-bit offsets.
2599  if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2600  return true;
2601  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2602  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2603  return false; // r+i
2604  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2605  return false; // r+i
2606 
2607  Base = N.getOperand(0);
2608  Index = N.getOperand(1);
2609  return true;
2610  } else if (N.getOpcode() == ISD::OR) {
2611  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2612  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2613  return false; // r+i can fold it if we can.
2614 
2615  // If this is an or of disjoint bitfields, we can codegen this as an add
2616  // (for better address arithmetic) if the LHS and RHS of the OR are provably
2617  // disjoint.
2618  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2619 
2620  if (LHSKnown.Zero.getBoolValue()) {
2621  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2622  // If all of the bits are known zero on the LHS or RHS, the add won't
2623  // carry.
2624  if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2625  Base = N.getOperand(0);
2626  Index = N.getOperand(1);
2627  return true;
2628  }
2629  }
2630  }
2631 
2632  return false;
2633 }
2634 
2635 // If we happen to be doing an i64 load or store into a stack slot that has
2636 // less than a 4-byte alignment, then the frame-index elimination may need to
2637 // use an indexed load or store instruction (because the offset may not be a
2638 // multiple of 4). The extra register needed to hold the offset comes from the
2639 // register scavenger, and it is possible that the scavenger will need to use
2640 // an emergency spill slot. As a result, we need to make sure that a spill slot
2641 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2642 // stack slot.
2643 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2644  // FIXME: This does not handle the LWA case.
2645  if (VT != MVT::i64)
2646  return;
2647 
2648  // NOTE: We'll exclude negative FIs here, which come from argument
2649  // lowering, because there are no known test cases triggering this problem
2650  // using packed structures (or similar). We can remove this exclusion if
2651  // we find such a test case. The reason why this is so test-case driven is
2652  // because this entire 'fixup' is only to prevent crashes (from the
2653  // register scavenger) on not-really-valid inputs. For example, if we have:
2654  // %a = alloca i1
2655  // %b = bitcast i1* %a to i64*
2656  // store i64* a, i64 b
2657  // then the store should really be marked as 'align 1', but is not. If it
2658  // were marked as 'align 1' then the indexed form would have been
2659  // instruction-selected initially, and the problem this 'fixup' is preventing
2660  // won't happen regardless.
2661  if (FrameIdx < 0)
2662  return;
2663 
2664  MachineFunction &MF = DAG.getMachineFunction();
2665  MachineFrameInfo &MFI = MF.getFrameInfo();
2666 
2667  if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2668  return;
2669 
2670  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2671  FuncInfo->setHasNonRISpills();
2672 }
2673 
2674 /// Returns true if the address N can be represented by a base register plus
2675 /// a signed 16-bit displacement [r+imm], and if it is not better
2676 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2677 /// displacements that are multiples of that value.
2679  SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2680  MaybeAlign EncodingAlignment) const {
2681  // FIXME dl should come from parent load or store, not from address
2682  SDLoc dl(N);
2683 
2684  // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2685  // a [pc+imm].
2686  if (SelectAddressPCRel(N, Base))
2687  return false;
2688 
2689  // If this can be more profitably realized as r+r, fail.
2690  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2691  return false;
2692 
2693  if (N.getOpcode() == ISD::ADD) {
2694  int16_t imm = 0;
2695  if (isIntS16Immediate(N.getOperand(1), imm) &&
2696  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2697  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2698  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2699  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2700  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2701  } else {
2702  Base = N.getOperand(0);
2703  }
2704  return true; // [r+i]
2705  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2706  // Match LOAD (ADD (X, Lo(G))).
2707  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2708  && "Cannot handle constant offsets yet!");
2709  Disp = N.getOperand(1).getOperand(0); // The global address.
2712  Disp.getOpcode() == ISD::TargetConstantPool ||
2713  Disp.getOpcode() == ISD::TargetJumpTable);
2714  Base = N.getOperand(0);
2715  return true; // [&g+r]
2716  }
2717  } else if (N.getOpcode() == ISD::OR) {
2718  int16_t imm = 0;
2719  if (isIntS16Immediate(N.getOperand(1), imm) &&
2720  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2721  // If this is an or of disjoint bitfields, we can codegen this as an add
2722  // (for better address arithmetic) if the LHS and RHS of the OR are
2723  // provably disjoint.
2724  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2725 
2726  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2727  // If all of the bits are known zero on the LHS or RHS, the add won't
2728  // carry.
2729  if (FrameIndexSDNode *FI =
2730  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2731  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2732  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2733  } else {
2734  Base = N.getOperand(0);
2735  }
2736  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2737  return true;
2738  }
2739  }
2740  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2741  // Loading from a constant address.
2742 
2743  // If this address fits entirely in a 16-bit sext immediate field, codegen
2744  // this as "d, 0"
2745  int16_t Imm;
2746  if (isIntS16Immediate(CN, Imm) &&
2747  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2748  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2749  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2750  CN->getValueType(0));
2751  return true;
2752  }
2753 
2754  // Handle 32-bit sext immediates with LIS + addr mode.
2755  if ((CN->getValueType(0) == MVT::i32 ||
2756  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2757  (!EncodingAlignment ||
2758  isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2759  int Addr = (int)CN->getZExtValue();
2760 
2761  // Otherwise, break this down into an LIS + disp.
2762  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2763 
2764  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2765  MVT::i32);
2766  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2767  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2768  return true;
2769  }
2770  }
2771 
2772  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2773  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2774  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2775  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2776  } else
2777  Base = N;
2778  return true; // [r+0]
2779 }
2780 
2781 /// Similar to the 16-bit case but for instructions that take a 34-bit
2782 /// displacement field (prefixed loads/stores).
2784  SDValue &Base,
2785  SelectionDAG &DAG) const {
2786  // Only on 64-bit targets.
2787  if (N.getValueType() != MVT::i64)
2788  return false;
2789 
2790  SDLoc dl(N);
2791  int64_t Imm = 0;
2792 
2793  if (N.getOpcode() == ISD::ADD) {
2794  if (!isIntS34Immediate(N.getOperand(1), Imm))
2795  return false;
2796  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2797  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2798  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2799  else
2800  Base = N.getOperand(0);
2801  return true;
2802  }
2803 
2804  if (N.getOpcode() == ISD::OR) {
2805  if (!isIntS34Immediate(N.getOperand(1), Imm))
2806  return false;
2807  // If this is an or of disjoint bitfields, we can codegen this as an add
2808  // (for better address arithmetic) if the LHS and RHS of the OR are
2809  // provably disjoint.
2810  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2811  if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2812  return false;
2813  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2814  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2815  else
2816  Base = N.getOperand(0);
2817  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2818  return true;
2819  }
2820 
2821  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2822  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2823  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2824  return true;
2825  }
2826 
2827  return false;
2828 }
2829 
2830 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2831 /// represented as an indexed [r+r] operation.
2833  SDValue &Index,
2834  SelectionDAG &DAG) const {
2835  // Check to see if we can easily represent this as an [r+r] address. This
2836  // will fail if it thinks that the address is more profitably represented as
2837  // reg+imm, e.g. where imm = 0.
2838  if (SelectAddressRegReg(N, Base, Index, DAG))
2839  return true;
2840 
2841  // If the address is the result of an add, we will utilize the fact that the
2842  // address calculation includes an implicit add. However, we can reduce
2843  // register pressure if we do not materialize a constant just for use as the
2844  // index register. We only get rid of the add if it is not an add of a
2845  // value and a 16-bit signed constant and both have a single use.
2846  int16_t imm = 0;
2847  if (N.getOpcode() == ISD::ADD &&
2848  (!isIntS16Immediate(N.getOperand(1), imm) ||
2849  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2850  Base = N.getOperand(0);
2851  Index = N.getOperand(1);
2852  return true;
2853  }
2854 
2855  // Otherwise, do it the hard way, using R0 as the base register.
2856  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2857  N.getValueType());
2858  Index = N;
2859  return true;
2860 }
2861 
2862 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2863  Ty *PCRelCand = dyn_cast<Ty>(N);
2864  return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2865 }
2866 
2867 /// Returns true if this address is a PC Relative address.
2868 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2869 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2871  // This is a materialize PC Relative node. Always select this as PC Relative.
2872  Base = N;
2873  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2874  return true;
2875  if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2876  isValidPCRelNode<GlobalAddressSDNode>(N) ||
2877  isValidPCRelNode<JumpTableSDNode>(N) ||
2878  isValidPCRelNode<BlockAddressSDNode>(N))
2879  return true;
2880  return false;
2881 }
2882 
2883 /// Returns true if we should use a direct load into vector instruction
2884 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2886 
2887  // If there are any other uses other than scalar to vector, then we should
2888  // keep it as a scalar load -> direct move pattern to prevent multiple
2889  // loads.
2890  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2891  if (!LD)
2892  return false;
2893 
2894  EVT MemVT = LD->getMemoryVT();
2895  if (!MemVT.isSimple())
2896  return false;
2897  switch(MemVT.getSimpleVT().SimpleTy) {
2898  case MVT::i64:
2899  break;
2900  case MVT::i32:
2901  if (!ST.hasP8Vector())
2902  return false;
2903  break;
2904  case MVT::i16:
2905  case MVT::i8:
2906  if (!ST.hasP9Vector())
2907  return false;
2908  break;
2909  default:
2910  return false;
2911  }
2912 
2913  SDValue LoadedVal(N, 0);
2914  if (!LoadedVal.hasOneUse())
2915  return false;
2916 
2917  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2918  UI != UE; ++UI)
2919  if (UI.getUse().get().getResNo() == 0 &&
2920  UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2921  UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2922  return false;
2923 
2924  return true;
2925 }
2926 
2927 /// getPreIndexedAddressParts - returns true by value, base pointer and
2928 /// offset pointer and addressing mode by reference if the node's address
2929 /// can be legally represented as pre-indexed load / store address.
2931  SDValue &Offset,
2932  ISD::MemIndexedMode &AM,
2933  SelectionDAG &DAG) const {
2934  if (DisablePPCPreinc) return false;
2935 
2936  bool isLoad = true;
2937  SDValue Ptr;
2938  EVT VT;
2939  unsigned Alignment;
2940  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2941  Ptr = LD->getBasePtr();
2942  VT = LD->getMemoryVT();
2943  Alignment = LD->getAlignment();
2944  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2945  Ptr = ST->getBasePtr();
2946  VT = ST->getMemoryVT();
2947  Alignment = ST->getAlignment();
2948  isLoad = false;
2949  } else
2950  return false;
2951 
2952  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2953  // instructions because we can fold these into a more efficient instruction
2954  // instead, (such as LXSD).
2955  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2956  return false;
2957  }
2958 
2959  // PowerPC doesn't have preinc load/store instructions for vectors
2960  if (VT.isVector())
2961  return false;
2962 
2963  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2964  // Common code will reject creating a pre-inc form if the base pointer
2965  // is a frame index, or if N is a store and the base pointer is either
2966  // the same as or a predecessor of the value being stored. Check for
2967  // those situations here, and try with swapped Base/Offset instead.
2968  bool Swap = false;
2969 
2970  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2971  Swap = true;
2972  else if (!isLoad) {
2973  SDValue Val = cast<StoreSDNode>(N)->getValue();
2974  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2975  Swap = true;
2976  }
2977 
2978  if (Swap)
2979  std::swap(Base, Offset);
2980 
2981  AM = ISD::PRE_INC;
2982  return true;
2983  }
2984 
2985  // LDU/STU can only handle immediates that are a multiple of 4.
2986  if (VT != MVT::i64) {
2987  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2988  return false;
2989  } else {
2990  // LDU/STU need an address with at least 4-byte alignment.
2991  if (Alignment < 4)
2992  return false;
2993 
2994  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2995  return false;
2996  }
2997 
2998  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2999  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3000  // sext i32 to i64 when addr mode is r+i.
3001  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3002  LD->getExtensionType() == ISD::SEXTLOAD &&
3003  isa<ConstantSDNode>(Offset))
3004  return false;
3005  }
3006 
3007  AM = ISD::PRE_INC;
3008  return true;
3009 }
3010 
3011 //===----------------------------------------------------------------------===//
3012 // LowerOperation implementation
3013 //===----------------------------------------------------------------------===//
3014 
3015 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3016 /// and LoOpFlags to the target MO flags.
3017 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3018  unsigned &HiOpFlags, unsigned &LoOpFlags,
3019  const GlobalValue *GV = nullptr) {
3020  HiOpFlags = PPCII::MO_HA;
3021  LoOpFlags = PPCII::MO_LO;
3022 
3023  // Don't use the pic base if not in PIC relocation model.
3024  if (IsPIC) {
3025  HiOpFlags |= PPCII::MO_PIC_FLAG;
3026  LoOpFlags |= PPCII::MO_PIC_FLAG;
3027  }
3028 }
3029 
3030 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3031  SelectionDAG &DAG) {
3032  SDLoc DL(HiPart);
3033  EVT PtrVT = HiPart.getValueType();
3034  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3035 
3036  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3037  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3038 
3039  // With PIC, the first instruction is actually "GR+hi(&G)".
3040  if (isPIC)
3041  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3042  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3043 
3044  // Generate non-pic code that has direct accesses to the constant pool.
3045  // The address of the global is just (hi(&g)+lo(&g)).
3046  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3047 }
3048 
3050  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3051  FuncInfo->setUsesTOCBasePtr();
3052 }
3053 
3054 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3056 }
3057 
3058 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3059  SDValue GA) const {
3060  const bool Is64Bit = Subtarget.isPPC64();
3061  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3062  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3063  : Subtarget.isAIXABI()
3064  ? DAG.getRegister(PPC::R2, VT)
3065  : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3066  SDValue Ops[] = { GA, Reg };
3067  return DAG.getMemIntrinsicNode(
3068  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3071 }
3072 
3073 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3074  SelectionDAG &DAG) const {
3075  EVT PtrVT = Op.getValueType();
3076  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3077  const Constant *C = CP->getConstVal();
3078 
3079  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3080  // The actual address of the GlobalValue is stored in the TOC.
3081  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3082  if (Subtarget.isUsingPCRelativeCalls()) {
3083  SDLoc DL(CP);
3084  EVT Ty = getPointerTy(DAG.getDataLayout());
3085  SDValue ConstPool = DAG.getTargetConstantPool(
3086  C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3087  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3088  }
3089  setUsesTOCBasePtr(DAG);
3090  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3091  return getTOCEntry(DAG, SDLoc(CP), GA);
3092  }
3093 
3094  unsigned MOHiFlag, MOLoFlag;
3095  bool IsPIC = isPositionIndependent();
3096  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3097 
3098  if (IsPIC && Subtarget.isSVR4ABI()) {
3099  SDValue GA =
3100  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3101  return getTOCEntry(DAG, SDLoc(CP), GA);
3102  }
3103 
3104  SDValue CPIHi =
3105  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3106  SDValue CPILo =
3107  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3108  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3109 }
3110 
3111 // For 64-bit PowerPC, prefer the more compact relative encodings.
3112 // This trades 32 bits per jump table entry for one or two instructions
3113 // on the jump site.
3115  if (isJumpTableRelative())
3117 
3119 }
3120 
3123  return false;
3124  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3125  return true;
3127 }
3128 
3130  SelectionDAG &DAG) const {
3131  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3132  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3133 
3134  switch (getTargetMachine().getCodeModel()) {
3135  case CodeModel::Small:
3136  case CodeModel::Medium:
3137  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3138  default:
3139  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3140  getPointerTy(DAG.getDataLayout()));
3141  }
3142 }
3143 
3144 const MCExpr *
3146  unsigned JTI,
3147  MCContext &Ctx) const {
3148  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3149  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3150 
3151  switch (getTargetMachine().getCodeModel()) {
3152  case CodeModel::Small:
3153  case CodeModel::Medium:
3154  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3155  default:
3156  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3157  }
3158 }
3159 
3160 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3161  EVT PtrVT = Op.getValueType();
3162  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3163 
3164  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3165  if (Subtarget.isUsingPCRelativeCalls()) {
3166  SDLoc DL(JT);
3167  EVT Ty = getPointerTy(DAG.getDataLayout());
3168  SDValue GA =
3169  DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3170  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3171  return MatAddr;
3172  }
3173 
3174  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3175  // The actual address of the GlobalValue is stored in the TOC.
3176  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3177  setUsesTOCBasePtr(DAG);
3178  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3179  return getTOCEntry(DAG, SDLoc(JT), GA);
3180  }
3181 
3182  unsigned MOHiFlag, MOLoFlag;
3183  bool IsPIC = isPositionIndependent();
3184  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3185 
3186  if (IsPIC && Subtarget.isSVR4ABI()) {
3187  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3189  return getTOCEntry(DAG, SDLoc(GA), GA);
3190  }
3191 
3192  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3193  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3194  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3195 }
3196 
3197 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3198  SelectionDAG &DAG) const {
3199  EVT PtrVT = Op.getValueType();
3200  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3201  const BlockAddress *BA = BASDN->getBlockAddress();
3202 
3203  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3204  if (Subtarget.isUsingPCRelativeCalls()) {
3205  SDLoc DL(BASDN);
3206  EVT Ty = getPointerTy(DAG.getDataLayout());
3207  SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3209  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3210  return MatAddr;
3211  }
3212 
3213  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3214  // The actual BlockAddress is stored in the TOC.
3215  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3216  setUsesTOCBasePtr(DAG);
3217  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3218  return getTOCEntry(DAG, SDLoc(BASDN), GA);
3219  }
3220 
3221  // 32-bit position-independent ELF stores the BlockAddress in the .got.
3222  if (Subtarget.is32BitELFABI() && isPositionIndependent())
3223  return getTOCEntry(
3224  DAG, SDLoc(BASDN),
3225  DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3226 
3227  unsigned MOHiFlag, MOLoFlag;
3228  bool IsPIC = isPositionIndependent();
3229  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3230  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3231  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3232  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3233 }
3234 
3235 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3236  SelectionDAG &DAG) const {
3237  if (Subtarget.isAIXABI())
3238  return LowerGlobalTLSAddressAIX(Op, DAG);
3239 
3240  return LowerGlobalTLSAddressLinux(Op, DAG);
3241 }
3242 
3243 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3244  SelectionDAG &DAG) const {
3245  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3246 
3247  if (DAG.getTarget().useEmulatedTLS())
3248  report_fatal_error("Emulated TLS is not yet supported on AIX");
3249 
3250  SDLoc dl(GA);
3251  const GlobalValue *GV = GA->getGlobal();
3252  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3253 
3254  // The general-dynamic model is the only access model supported for now, so
3255  // all the GlobalTLSAddress nodes are lowered with this model.
3256  // We need to generate two TOC entries, one for the variable offset, one for
3257  // the region handle. The global address for the TOC entry of the region
3258  // handle is created with the MO_TLSGDM_FLAG flag and the global address
3259  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3260  SDValue VariableOffsetTGA =
3261  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3262  SDValue RegionHandleTGA =
3263  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3264  SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3265  SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3266  return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3267  RegionHandle);
3268 }
3269 
3270 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3271  SelectionDAG &DAG) const {
3272  // FIXME: TLS addresses currently use medium model code sequences,
3273  // which is the most useful form. Eventually support for small and
3274  // large models could be added if users need it, at the cost of
3275  // additional complexity.
3276  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3277  if (DAG.getTarget().useEmulatedTLS())
3278  return LowerToTLSEmulatedModel(GA, DAG);
3279 
3280  SDLoc dl(GA);
3281  const GlobalValue *GV = GA->getGlobal();
3282  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3283  bool is64bit = Subtarget.isPPC64();
3284  const Module *M = DAG.getMachineFunction().getFunction().getParent();
3285  PICLevel::Level picLevel = M->getPICLevel();
3286 
3287  const TargetMachine &TM = getTargetMachine();
3288  TLSModel::Model Model = TM.getTLSModel(GV);
3289 
3290  if (Model == TLSModel::LocalExec) {
3291  if (Subtarget.isUsingPCRelativeCalls()) {
3292  SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3293  SDValue TGA = DAG.getTargetGlobalAddress(
3294  GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3295  SDValue MatAddr =
3296  DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3297  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3298  }
3299 
3300  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3302  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3304  SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3305  : DAG.getRegister(PPC::R2, MVT::i32);
3306 
3307  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3308  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3309  }
3310 
3311  if (Model == TLSModel::InitialExec) {
3312  bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3313  SDValue TGA = DAG.getTargetGlobalAddress(
3314  GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3315  SDValue TGATLS = DAG.getTargetGlobalAddress(
3316  GV, dl, PtrVT, 0,
3318  SDValue TPOffset;
3319  if (IsPCRel) {
3320  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3321  TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3322  MachinePointerInfo());
3323  } else {
3324  SDValue GOTPtr;
3325  if (is64bit) {
3326  setUsesTOCBasePtr(DAG);
3327  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3328  GOTPtr =
3329  DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3330  } else {
3331  if (!TM.isPositionIndependent())
3332  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3333  else if (picLevel == PICLevel::SmallPIC)
3334  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3335  else
3336  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3337  }
3338  TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3339  }
3340  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3341  }
3342 
3344  if (Subtarget.isUsingPCRelativeCalls()) {
3345  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3347  return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3348  }
3349 
3350  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3351  SDValue GOTPtr;
3352  if (is64bit) {
3353  setUsesTOCBasePtr(DAG);
3354  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3355  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3356  GOTReg, TGA);
3357  } else {
3358  if (picLevel == PICLevel::SmallPIC)
3359  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3360  else
3361  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3362  }
3363  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3364  GOTPtr, TGA, TGA);
3365  }
3366 
3367  if (Model == TLSModel::LocalDynamic) {
3368  if (Subtarget.isUsingPCRelativeCalls()) {
3369  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3371  SDValue MatPCRel =
3372  DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3373  return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3374  }
3375 
3376  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3377  SDValue GOTPtr;
3378  if (is64bit) {
3379  setUsesTOCBasePtr(DAG);
3380  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3381  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3382  GOTReg, TGA);
3383  } else {
3384  if (picLevel == PICLevel::SmallPIC)
3385  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3386  else
3387  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3388  }
3389  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3390  PtrVT, GOTPtr, TGA, TGA);
3391  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3392  PtrVT, TLSAddr, TGA);
3393  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3394  }
3395 
3396  llvm_unreachable("Unknown TLS model!");
3397 }
3398 
3399 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3400  SelectionDAG &DAG) const {
3401  EVT PtrVT = Op.getValueType();
3402  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3403  SDLoc DL(GSDN);
3404  const GlobalValue *GV = GSDN->getGlobal();
3405 
3406  // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3407  // The actual address of the GlobalValue is stored in the TOC.
3408  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3409  if (Subtarget.isUsingPCRelativeCalls()) {
3410  EVT Ty = getPointerTy(DAG.getDataLayout());
3411  if (isAccessedAsGotIndirect(Op)) {
3412  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3415  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3416  SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3417  MachinePointerInfo());
3418  return Load;
3419  } else {
3420  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3422  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3423  }
3424  }
3425  setUsesTOCBasePtr(DAG);
3426  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3427  return getTOCEntry(DAG, DL, GA);
3428  }
3429 
3430  unsigned MOHiFlag, MOLoFlag;
3431  bool IsPIC = isPositionIndependent();
3432  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3433 
3434  if (IsPIC && Subtarget.isSVR4ABI()) {
3435  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3436  GSDN->getOffset(),
3438  return getTOCEntry(DAG, DL, GA);
3439  }
3440 
3441  SDValue GAHi =
3442  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3443  SDValue GALo =
3444  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3445 
3446  return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3447 }
3448 
3449 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3450  bool IsStrict = Op->isStrictFPOpcode();
3451  ISD::CondCode CC =
3452  cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3453  SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3454  SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3455  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3456  EVT LHSVT = LHS.getValueType();
3457  SDLoc dl(Op);
3458 
3459  // Soften the setcc with libcall if it is fp128.
3460  if (LHSVT == MVT::f128) {
3461  assert(!Subtarget.hasP9Vector() &&
3462  "SETCC for f128 is already legal under Power9!");
3463  softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3464  Op->getOpcode() == ISD::STRICT_FSETCCS);
3465  if (RHS.getNode())
3466  LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3467  DAG.getCondCode(CC));
3468  if (IsStrict)
3469  return DAG.getMergeValues({LHS, Chain}, dl);
3470  return LHS;
3471  }
3472 
3473  assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3474 
3475  if (Op.getValueType() == MVT::v2i64) {
3476  // When the operands themselves are v2i64 values, we need to do something
3477  // special because VSX has no underlying comparison operations for these.
3478  if (LHS.getValueType() == MVT::v2i64) {
3479  // Equality can be handled by casting to the legal type for Altivec
3480  // comparisons, everything else needs to be expanded.
3481  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3482  return DAG.getNode(
3483  ISD::BITCAST, dl, MVT::v2i64,
3484  DAG.getSetCC(dl, MVT::v4i32,
3485  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3486  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC));
3487  }
3488 
3489  return SDValue();
3490  }
3491 
3492  // We handle most of these in the usual way.
3493  return Op;
3494  }
3495 
3496  // If we're comparing for equality to zero, expose the fact that this is
3497  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3498  // fold the new nodes.
3499  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3500  return V;
3501 
3502  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3503  // Leave comparisons against 0 and -1 alone for now, since they're usually
3504  // optimized. FIXME: revisit this when we can custom lower all setcc
3505  // optimizations.
3506  if (C->isAllOnesValue() || C->isNullValue())
3507  return SDValue();
3508  }
3509 
3510  // If we have an integer seteq/setne, turn it into a compare against zero
3511  // by xor'ing the rhs with the lhs, which is faster than setting a
3512  // condition register, reading it back out, and masking the correct bit. The
3513  // normal approach here uses sub to do this instead of xor. Using xor exposes
3514  // the result to other bit-twiddling opportunities.
3515  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3516  EVT VT = Op.getValueType();
3517  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3518  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3519  }
3520  return SDValue();
3521 }
3522 
3523 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3524  SDNode *Node = Op.getNode();
3525  EVT VT = Node->getValueType(0);
3526  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3527  SDValue InChain = Node->getOperand(0);
3528  SDValue VAListPtr = Node->getOperand(1);
3529  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3530  SDLoc dl(Node);
3531 
3532  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3533 
3534  // gpr_index
3535  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3536  VAListPtr, MachinePointerInfo(SV), MVT::i8);
3537  InChain = GprIndex.getValue(1);
3538 
3539  if (VT == MVT::i64) {
3540  // Check if GprIndex is even
3541  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3542  DAG.getConstant(1, dl, MVT::i32));
3543  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3544  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3545  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3546  DAG.getConstant(1, dl, MVT::i32));
3547  // Align GprIndex to be even if it isn't
3548  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3549  GprIndex);
3550  }
3551 
3552  // fpr index is 1 byte after gpr
3553  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3554  DAG.getConstant(1, dl, MVT::i32));
3555 
3556  // fpr
3557  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3558  FprPtr, MachinePointerInfo(SV), MVT::i8);
3559  InChain = FprIndex.getValue(1);
3560 
3561  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3562  DAG.getConstant(8, dl, MVT::i32));
3563 
3564  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3565  DAG.getConstant(4, dl, MVT::i32));
3566 
3567  // areas
3568  SDValue OverflowArea =
3569  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3570  InChain = OverflowArea.getValue(1);
3571 
3572  SDValue RegSaveArea =
3573  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3574  InChain = RegSaveArea.getValue(1);
3575 
3576  // select overflow_area if index > 8
3577  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3578  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3579 
3580  // adjustment constant gpr_index * 4/8
3581  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3582  VT.isInteger() ? GprIndex : FprIndex,
3583  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3584  MVT::i32));
3585 
3586  // OurReg = RegSaveArea + RegConstant
3587  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3588  RegConstant);
3589 
3590  // Floating types are 32 bytes into RegSaveArea
3591  if (VT.isFloatingPoint())
3592  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3593  DAG.getConstant(32, dl, MVT::i32));
3594 
3595  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3596  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3597  VT.isInteger() ? GprIndex : FprIndex,
3598  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3599  MVT::i32));
3600 
3601  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3602  VT.isInteger() ? VAListPtr : FprPtr,
3604 
3605  // determine if we should load from reg_save_area or overflow_area
3606  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3607 
3608  // increase overflow_area by 4/8 if gpr/fpr > 8
3609  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3610  DAG.getConstant(VT.isInteger() ? 4 : 8,
3611  dl, MVT::i32));
3612 
3613  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3614  OverflowAreaPlusN);
3615 
3616  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3618 
3619  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3620 }
3621 
3622 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3623  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3624 
3625  // We have to copy the entire va_list struct:
3626  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3627  return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3628  DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3629  false, true, false, MachinePointerInfo(),
3630  MachinePointerInfo());
3631 }
3632 
3633 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3634  SelectionDAG &DAG) const {
3635  if (Subtarget.isAIXABI())
3636  report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3637 
3638  return Op.getOperand(0);
3639 }
3640 
3641 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3642  MachineFunction &MF = DAG.getMachineFunction();
3643  PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3644 
3645  assert((Op.getOpcode() == ISD::INLINEASM ||
3646  Op.getOpcode() == ISD::INLINEASM_BR) &&
3647  "Expecting Inline ASM node.");
3648 
3649  // If an LR store is already known to be required then there is not point in
3650  // checking this ASM as well.
3651  if (MFI.isLRStoreRequired())
3652  return Op;
3653 
3654  // Inline ASM nodes have an optional last operand that is an incoming Flag of
3655  // type MVT::Glue. We want to ignore this last operand if that is the case.
3656  unsigned NumOps = Op.getNumOperands();
3657  if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3658  --NumOps;
3659 
3660  // Check all operands that may contain the LR.
3661  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3662  unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3663  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3664  ++i; // Skip the ID value.
3665 
3666  switch (InlineAsm::getKind(Flags)) {
3667  default:
3668  llvm_unreachable("Bad flags!");
3670  case InlineAsm::Kind_Imm:
3671  case InlineAsm::Kind_Mem:
3672  i += NumVals;
3673  break;
3677  for (; NumVals; --NumVals, ++i) {
3678  Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3679  if (Reg != PPC::LR && Reg != PPC::LR8)
3680  continue;
3681  MFI.setLRStoreRequired();
3682  return Op;
3683  }
3684  break;
3685  }
3686  }
3687  }
3688 
3689  return Op;
3690 }
3691 
3692 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3693  SelectionDAG &DAG) const {
3694  if (Subtarget.isAIXABI())
3695  report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3696 
3697  SDValue Chain = Op.getOperand(0);
3698  SDValue Trmp = Op.getOperand(1); // trampoline
3699  SDValue FPtr = Op.getOperand(2); // nested function
3700  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3701  SDLoc dl(Op);
3702 
3703  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3704  bool isPPC64 = (PtrVT == MVT::i64);
3705  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3706 
3708  TargetLowering::ArgListEntry Entry;
3709 
3710  Entry.Ty = IntPtrTy;
3711  Entry.Node = Trmp; Args.push_back(Entry);
3712 
3713  // TrampSize == (isPPC64 ? 48 : 40);
3714  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3715  isPPC64 ? MVT::i64 : MVT::i32);
3716  Args.push_back(Entry);
3717 
3718  Entry.Node = FPtr; Args.push_back(Entry);
3719  Entry.Node = Nest; Args.push_back(Entry);
3720 
3721  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3723  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3725  DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3726 
3727  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3728  return CallResult.second;
3729 }
3730 
3731 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3732  MachineFunction &MF = DAG.getMachineFunction();
3733  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3734  EVT PtrVT = getPointerTy(MF.getDataLayout());
3735 
3736  SDLoc dl(Op);
3737 
3738  if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3739  // vastart just stores the address of the VarArgsFrameIndex slot into the
3740  // memory location argument.
3741  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3742  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3743  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3744  MachinePointerInfo(SV));
3745  }
3746 
3747  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3748  // We suppose the given va_list is already allocated.
3749  //
3750  // typedef struct {
3751  // char gpr; /* index into the array of 8 GPRs
3752  // * stored in the register save area
3753  // * gpr=0 corresponds to r3,
3754  // * gpr=1 to r4, etc.
3755  // */
3756  // char fpr; /* index into the array of 8 FPRs
3757  // * stored in the register save area
3758  // * fpr=0 corresponds to f1,
3759  // * fpr=1 to f2, etc.
3760  // */
3761  // char *overflow_arg_area;
3762  // /* location on stack that holds
3763  // * the next overflow argument
3764  // */
3765  // char *reg_save_area;
3766  // /* where r3:r10 and f1:f8 (if saved)
3767  // * are stored
3768  // */
3769  // } va_list[1];
3770 
3771  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3772  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3773  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3774  PtrVT);
3775  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3776  PtrVT);
3777 
3778  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3779  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3780 
3781  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3782  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3783 
3784  uint64_t FPROffset = 1;
3785  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3786 
3787  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3788 
3789  // Store first byte : number of int regs
3790  SDValue firstStore =
3791  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3793  uint64_t nextOffset = FPROffset;
3794  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3795  ConstFPROffset);
3796 
3797  // Store second byte : number of float regs
3798  SDValue secondStore =
3799  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3800  MachinePointerInfo(SV, nextOffset), MVT::i8);
3801  nextOffset += StackOffset;
3802  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3803 
3804  // Store second word : arguments given on stack
3805  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3806  MachinePointerInfo(SV, nextOffset));
3807  nextOffset += FrameOffset;
3808  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3809 
3810  // Store third word : arguments given in registers
3811  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3812  MachinePointerInfo(SV, nextOffset));
3813 }
3814 
3815 /// FPR - The set of FP registers that should be allocated for arguments
3816 /// on Darwin and AIX.
3817 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3818  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3819  PPC::F11, PPC::F12, PPC::F13};
3820 
3821 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3822 /// the stack.
3823 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3824  unsigned PtrByteSize) {
3825  unsigned ArgSize = ArgVT.getStoreSize();
3826  if (Flags.isByVal())
3827  ArgSize = Flags.getByValSize();
3828 
3829  // Round up to multiples of the pointer size, except for array members,
3830  // which are always packed.
3831  if (!Flags.isInConsecutiveRegs())
3832  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3833 
3834  return ArgSize;
3835 }
3836 
3837 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3838 /// on the stack.
3840  ISD::ArgFlagsTy Flags,
3841  unsigned PtrByteSize) {
3842  Align Alignment(PtrByteSize);
3843 
3844  // Altivec parameters are padded to a 16 byte boundary.
3845  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3846  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3847  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3848  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3849  Alignment = Align(16);
3850 
3851  // ByVal parameters are aligned as requested.
3852  if (Flags.isByVal()) {
3853  auto BVAlign = Flags.getNonZeroByValAlign();
3854  if (BVAlign > PtrByteSize) {
3855  if (BVAlign.value() % PtrByteSize != 0)
3857  "ByVal alignment is not a multiple of the pointer size");
3858 
3859  Alignment = BVAlign;
3860  }
3861  }
3862 
3863  // Array members are always packed to their original alignment.
3864  if (Flags.isInConsecutiveRegs()) {
3865  // If the array member was split into multiple registers, the first
3866  // needs to be aligned to the size of the full type. (Except for
3867  // ppcf128, which is only aligned as its f64 components.)
3868  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3869  Alignment = Align(OrigVT.getStoreSize());
3870  else
3871  Alignment = Align(ArgVT.getStoreSize());
3872  }
3873 
3874  return Alignment;
3875 }
3876 
3877 /// CalculateStackSlotUsed - Return whether this argument will use its
3878 /// stack slot (instead of being passed in registers). ArgOffset,
3879 /// AvailableFPRs, and AvailableVRs must hold the current argument
3880 /// position, and will be updated to account for this argument.
3881 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3882  unsigned PtrByteSize, unsigned LinkageSize,
3883  unsigned ParamAreaSize, unsigned &ArgOffset,
3884  unsigned &AvailableFPRs,
3885  unsigned &AvailableVRs) {
3886  bool UseMemory = false;
3887 
3888  // Respect alignment of argument on the stack.
3889  Align Alignment =
3890  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3891  ArgOffset = alignTo(ArgOffset, Alignment);
3892  // If there's no space left in the argument save area, we must
3893  // use memory (this check also catches zero-sized arguments).
3894  if (ArgOffset >= LinkageSize + ParamAreaSize)
3895  UseMemory = true;
3896 
3897  // Allocate argument on the stack.
3898  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3899  if (Flags.isInConsecutiveRegsLast())
3900  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3901  // If we overran the argument save area, we must use memory
3902  // (this check catches arguments passed partially in memory)
3903  if (ArgOffset > LinkageSize + ParamAreaSize)
3904  UseMemory = true;
3905 
3906  // However, if the argument is actually passed in an FPR or a VR,
3907  // we don't use memory after all.
3908  if (!Flags.isByVal()) {
3909  if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3910  if (AvailableFPRs > 0) {
3911  --AvailableFPRs;
3912  return false;
3913  }
3914  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3915  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3916  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3917  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3918  if (AvailableVRs > 0) {
3919  --AvailableVRs;
3920  return false;
3921  }
3922  }
3923 
3924  return UseMemory;
3925 }
3926 
3927 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3928 /// ensure minimum alignment required for target.
3930  unsigned NumBytes) {
3931  return alignTo(NumBytes, Lowering->getStackAlign());
3932 }
3933 
3934 SDValue PPCTargetLowering::LowerFormalArguments(
3935  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3936  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3937  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3938  if (Subtarget.isAIXABI())
3939  return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3940  InVals);
3941  if (Subtarget.is64BitELFABI())
3942  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3943  InVals);
3944  assert(Subtarget.is32BitELFABI());
3945  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3946  InVals);
3947 }
3948 
3949 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3950  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3951  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3952  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3953 
3954  // 32-bit SVR4 ABI Stack Frame Layout:
3955  // +-----------------------------------+
3956  // +--> | Back chain |
3957  // | +-----------------------------------+
3958  // | | Floating-point register save area |
3959  // | +-----------------------------------+
3960  // | | General register save area |
3961  // | +-----------------------------------+
3962  // | | CR save word |
3963  // | +-----------------------------------+
3964  // | | VRSAVE save word |
3965  // | +-----------------------------------+
3966  // | | Alignment padding |
3967  // | +-----------------------------------+
3968  // | | Vector register save area |
3969  // | +-----------------------------------+
3970  // | | Local variable space |
3971  // | +-----------------------------------+
3972  // | | Parameter list area |
3973  // | +-----------------------------------+
3974  // | | LR save word |
3975  // | +-----------------------------------+
3976  // SP--> +--- | Back chain |
3977  // +-----------------------------------+
3978  //
3979  // Specifications:
3980  // System V Application Binary Interface PowerPC Processor Supplement
3981  // AltiVec Technology Programming Interface Manual
3982 
3983  MachineFunction &MF = DAG.getMachineFunction();
3984  MachineFrameInfo &MFI = MF.getFrameInfo();
3985  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3986 
3987  EVT PtrVT = getPointerTy(MF.getDataLayout());
3988  // Potential tail calls could cause overwriting of argument stack slots.
3989  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3990  (CallConv == CallingConv::Fast));
3991  const Align PtrAlign(4);
3992 
3993  // Assign locations to all of the incoming arguments.
3995  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3996  *DAG.getContext());
3997 
3998  // Reserve space for the linkage area on the stack.
3999  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4000  CCInfo.AllocateStack(LinkageSize, PtrAlign);
4001  if (useSoftFloat())
4002  CCInfo.PreAnalyzeFormalArguments(Ins);
4003 
4004  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4005  CCInfo.clearWasPPCF128();
4006 
4007  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4008  CCValAssign &VA = ArgLocs[i];
4009 
4010  // Arguments stored in registers.
4011  if (VA.isRegLoc()) {
4012  const TargetRegisterClass *RC;
4013  EVT ValVT = VA.getValVT();
4014 
4015  switch (ValVT.getSimpleVT().SimpleTy) {
4016  default:
4017  llvm_unreachable("ValVT not supported by formal arguments Lowering");
4018  case MVT::i1:
4019  case MVT::i32:
4020  RC = &PPC::GPRCRegClass;
4021  break;
4022  case MVT::f32:
4023  if (Subtarget.hasP8Vector())
4024  RC = &PPC::VSSRCRegClass;
4025  else if (Subtarget.hasSPE())
4026  RC = &PPC::GPRCRegClass;
4027  else
4028  RC = &PPC::F4RCRegClass;
4029  break;
4030  case MVT::f64:
4031  if (Subtarget.hasVSX())
4032  RC = &PPC::VSFRCRegClass;
4033  else if (Subtarget.hasSPE())
4034  // SPE passes doubles in GPR pairs.
4035  RC = &PPC::GPRCRegClass;
4036  else
4037  RC = &PPC::F8RCRegClass;
4038  break;
4039  case MVT::v16i8:
4040  case MVT::v8i16:
4041  case MVT::v4i32:
4042  RC = &PPC::VRRCRegClass;
4043  break;
4044  case MVT::v4f32:
4045  RC = &PPC::VRRCRegClass;
4046  break;
4047  case MVT::v2f64:
4048  case MVT::v2i64:
4049  RC = &PPC::VRRCRegClass;
4050  break;
4051  }
4052 
4053  SDValue ArgValue;
4054  // Transform the arguments stored in physical registers into
4055  // virtual ones.
4056  if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4057  assert(i + 1 < e && "No second half of double precision argument");
4058  unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4059  unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4060  SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4061  SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4062  if (!Subtarget.isLittleEndian())
4063  std::swap (ArgValueLo, ArgValueHi);
4064  ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4065  ArgValueHi);
4066  } else {
4067  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4068  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4069  ValVT == MVT::i1 ? MVT::i32 : ValVT);
4070  if (ValVT == MVT::i1)
4071  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4072  }
4073 
4074  InVals.push_back(ArgValue);
4075  } else {
4076  // Argument stored in memory.
4077  assert(VA.isMemLoc());
4078 
4079  // Get the extended size of the argument type in stack
4080  unsigned ArgSize = VA.getLocVT().getStoreSize();
4081  // Get the actual size of the argument type
4082  unsigned ObjSize = VA.getValVT().getStoreSize();
4083  unsigned ArgOffset = VA.getLocMemOffset();
4084  // Stack objects in PPC32 are right justified.
4085  ArgOffset += ArgSize - ObjSize;
4086  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4087 
4088  // Create load nodes to retrieve arguments from the stack.
4089  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4090  InVals.push_back(
4091  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4092  }
4093  }
4094 
4095  // Assign locations to all of the incoming aggregate by value arguments.
4096  // Aggregates passed by value are stored in the local variable space of the
4097  // caller's stack frame, right above the parameter list area.
4098  SmallVector<CCValAssign, 16> ByValArgLocs;
4099  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4100  ByValArgLocs, *DAG.getContext());
4101 
4102  // Reserve stack space for the allocations in CCInfo.
4103  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4104 
4105  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4106 
4107  // Area that is at least reserved in the caller of this function.
4108  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4109  MinReservedArea = std::max(MinReservedArea, LinkageSize);
4110 
4111  // Set the size that is at least reserved in caller of this function. Tail
4112  // call optimized function's reserved stack space needs to be aligned so that
4113  // taking the difference between two stack areas will result in an aligned
4114  // stack.
4115  MinReservedArea =
4116  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4117  FuncInfo->setMinReservedArea(MinReservedArea);
4118 
4119  SmallVector<SDValue, 8> MemOps;
4120 
4121  // If the function takes variable number of arguments, make a frame index for
4122  // the start of the first vararg value... for expansion of llvm.va_start.
4123  if (isVarArg) {
4124  static const MCPhysReg GPArgRegs[] = {
4125  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4126  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4127  };
4128  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
4129 
4130  static const MCPhysReg FPArgRegs[] = {
4131  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4132  PPC::F8
4133  };
4134  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
4135 
4136  if (useSoftFloat() || hasSPE())
4137  NumFPArgRegs = 0;
4138 
4139  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4140  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4141 
4142  // Make room for NumGPArgRegs and NumFPArgRegs.
4143  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4144  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4145 
4146  FuncInfo->setVarArgsStackOffset(
4147  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4148  CCInfo.getNextStackOffset(), true));
4149 
4150  FuncInfo->setVarArgsFrameIndex(
4151  MFI.CreateStackObject(Depth, Align(8), false));
4152  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4153 
4154  // The fixed integer arguments of a variadic function are stored to the
4155  // VarArgsFrameIndex on the stack so that they may be loaded by
4156  // dereferencing the result of va_next.
4157  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4158  // Get an existing live-in vreg, or add a new one.
4159  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4160  if (!VReg)
4161  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4162 
4163  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4164  SDValue Store =
4165  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4166  MemOps.push_back(Store);
4167  // Increment the address by four for the next argument to store
4168  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4169  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4170  }
4171 
4172  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4173  // is set.
4174  // The double arguments are stored to the VarArgsFrameIndex
4175  // on the stack.
4176  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4177  // Get an existing live-in vreg, or add a new one.
4178  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4179  if (!VReg)
4180  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4181 
4182  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4183  SDValue Store =
4184  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4185  MemOps.push_back(Store);
4186  // Increment the address by eight for the next argument to store
4187  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4188  PtrVT);
4189  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4190  }
4191  }
4192 
4193  if (!MemOps.empty())
4194  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4195 
4196  return Chain;
4197 }
4198 
4199 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4200 // value to MVT::i64 and then truncate to the correct register size.
4201 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4202  EVT ObjectVT, SelectionDAG &DAG,
4203  SDValue ArgVal,
4204  const SDLoc &dl) const {
4205  if (Flags.isSExt())
4206  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4207  DAG.getValueType(ObjectVT));
4208  else if (Flags.isZExt())
4209  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4210  DAG.getValueType(ObjectVT));
4211 
4212  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4213 }
4214 
4215 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4216  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4217  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4218  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4219  // TODO: add description of PPC stack frame format, or at least some docs.
4220  //
4221  bool isELFv2ABI = Subtarget.isELFv2ABI();
4222  bool isLittleEndian = Subtarget.isLittleEndian();
4223  MachineFunction &MF = DAG.getMachineFunction();
4224  MachineFrameInfo &MFI = MF.getFrameInfo();
4225  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4226 
4227  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4228  "fastcc not supported on varargs functions");
4229 
4230  EVT PtrVT = getPointerTy(MF.getDataLayout());
4231  // Potential tail calls could cause overwriting of argument stack slots.
4232  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4233  (CallConv == CallingConv::Fast));
4234  unsigned PtrByteSize = 8;
4235  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4236 
4237  static const MCPhysReg GPR[] = {
4238  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4239  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4240  };
4241  static const MCPhysReg VR[] = {
4242  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4243  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4244  };
4245 
4246  const unsigned Num_GPR_Regs = array_lengthof(GPR);
4247  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4248  const unsigned Num_VR_Regs = array_lengthof(VR);
4249 
4250  // Do a first pass over the arguments to determine whether the ABI
4251  // guarantees that our caller has allocated the parameter save area
4252  // on its stack frame. In the ELFv1 ABI, this is always the case;
4253  // in the ELFv2 ABI, it is true if this is a vararg function or if
4254  // any parameter is located in a stack slot.
4255 
4256  bool HasParameterArea = !isELFv2ABI || isVarArg;
4257  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4258  unsigned NumBytes = LinkageSize;
4259  unsigned AvailableFPRs = Num_FPR_Regs;
4260  unsigned AvailableVRs = Num_VR_Regs;
4261  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4262  if (Ins[i].Flags.isNest())
4263  continue;
4264 
4265  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4266  PtrByteSize, LinkageSize, ParamAreaSize,
4267  NumBytes, AvailableFPRs, AvailableVRs))
4268  HasParameterArea = true;
4269  }
4270 
4271  // Add DAG nodes to load the arguments or copy them out of registers. On
4272  // entry to a function on PPC, the arguments start after the linkage area,
4273  // although the first ones are often in registers.
4274 
4275  unsigned ArgOffset = LinkageSize;
4276  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4277  SmallVector<SDValue, 8> MemOps;
4279  unsigned CurArgIdx = 0;
4280  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4281  SDValue ArgVal;
4282  bool needsLoad = false;
4283  EVT ObjectVT = Ins[ArgNo].VT;
4284  EVT OrigVT = Ins[ArgNo].ArgVT;
4285  unsigned ObjSize = ObjectVT.getStoreSize();
4286  unsigned ArgSize = ObjSize;
4287  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4288  if (Ins[ArgNo].isOrigArg()) {
4289  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4290  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4291  }
4292  // We re-align the argument offset for each argument, except when using the
4293  // fast calling convention, when we need to make sure we do that only when
4294  // we'll actually use a stack slot.
4295  unsigned CurArgOffset;
4296  Align Alignment;
4297  auto ComputeArgOffset = [&]() {
4298  /* Respect alignment of argument on the stack. */
4299  Alignment =
4300  CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4301  ArgOffset = alignTo(ArgOffset, Alignment);
4302  CurArgOffset = ArgOffset;
4303  };
4304 
4305  if (CallConv != CallingConv::Fast) {
4306  ComputeArgOffset();
4307 
4308  /* Compute GPR index associated with argument offset. */
4309  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4310  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4311  }
4312 
4313  // FIXME the codegen can be much improved in some cases.
4314  // We do not have to keep everything in memory.
4315  if (Flags.isByVal()) {
4316  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4317 
4318  if (CallConv == CallingConv::Fast)
4319  ComputeArgOffset();
4320 
4321  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4322  ObjSize = Flags.getByValSize();
4323  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4324  // Empty aggregate parameters do not take up registers. Examples:
4325  // struct { } a;
4326  // union { } b;
4327  // int c[0];
4328  // etc. However, we have to provide a place-holder in InVals, so
4329  // pretend we have an 8-byte item at the current address for that
4330  // purpose.
4331  if (!ObjSize) {
4332  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4333  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4334  InVals.push_back(FIN);
4335  continue;
4336  }
4337 
4338  // Create a stack object covering all stack doublewords occupied
4339  // by the argument. If the argument is (fully or partially) on
4340  // the stack, or if the argument is fully in registers but the
4341  // caller has allocated the parameter save anyway, we can refer
4342  // directly to the caller's stack frame. Otherwise, create a
4343  // local copy in our own frame.
4344  int FI;
4345  if (HasParameterArea ||
4346  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4347  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4348  else
4349  FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4350  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4351 
4352  // Handle aggregates smaller than 8 bytes.
4353  if (ObjSize < PtrByteSize) {
4354  // The value of the object is its address, which differs from the
4355  // address of the enclosing doubleword on big-endian systems.
4356  SDValue Arg = FIN;
4357  if (!isLittleEndian) {
4358  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4359  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4360  }
4361  InVals.push_back(Arg);
4362 
4363  if (GPR_idx != Num_GPR_Regs) {
4364  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4365  FuncInfo->addLiveInAttr(VReg, Flags);
4366  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4367  SDValue Store;
4368 
4369  if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4370  EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4371  (ObjSize == 2 ? MVT::i16 : MVT::i32));
4372  Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4373  MachinePointerInfo(&*FuncArg), ObjType);
4374  } else {
4375  // For sizes that don't fit a truncating store (3, 5, 6, 7),
4376  // store the whole register as-is to the parameter save area
4377  // slot.
4378  Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4379  MachinePointerInfo(&*FuncArg));
4380  }
4381 
4382  MemOps.push_back(Store);
4383  }
4384  // Whether we copied from a register or not, advance the offset
4385  // into the parameter save area by a full doubleword.
4386  ArgOffset += PtrByteSize;
4387  continue;
4388  }
4389 
4390  // The value of the object is its address, which is the address of
4391  // its first stack doubleword.
4392  InVals.push_back(FIN);
4393 
4394  // Store whatever pieces of the object are in registers to memory.
4395  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4396  if (GPR_idx == Num_GPR_Regs)
4397  break;
4398 
4399  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4400  FuncInfo->addLiveInAttr(VReg, Flags);
4401  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4402  SDValue Addr = FIN;
4403  if (j) {
4404  SDValue Off = DAG.getConstant(j, dl, PtrVT);
4405  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4406  }
4407  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4408  MachinePointerInfo(&*FuncArg, j));
4409  MemOps.push_back(Store);
4410  ++GPR_idx;
4411  }
4412  ArgOffset += ArgSize;
4413  continue;
4414  }
4415 
4416  switch (ObjectVT.getSimpleVT().SimpleTy) {
4417  default: llvm_unreachable("Unhandled argument type!");
4418  case MVT::i1:
4419  case MVT::i32:
4420  case MVT::i64:
4421  if (Flags.isNest()) {
4422  // The 'nest' parameter, if any, is passed in R11.
4423  unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4424  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4425 
4426  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4427  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4428 
4429  break;
4430  }
4431 
4432  // These can be scalar arguments or elements of an integer array type
4433  // passed directly. Clang may use those instead of "byval" aggregate
4434  // types to avoid forcing arguments to memory unnecessarily.
4435  if (GPR_idx != Num_GPR_Regs) {
4436  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4437  FuncInfo->addLiveInAttr(VReg, Flags);
4438  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4439 
4440  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4441  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4442  // value to MVT::i64 and then truncate to the correct register size.
4443  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4444  } else {
4445  if (CallConv == CallingConv::Fast)
4446  ComputeArgOffset();
4447 
4448  needsLoad = true;
4449  ArgSize = PtrByteSize;
4450  }
4451  if (CallConv != CallingConv::Fast || needsLoad)
4452  ArgOffset += 8;
4453  break;
4454 
4455  case MVT::f32:
4456  case MVT::f64:
4457  // These can be scalar arguments or elements of a float array type
4458  // passed directly. The latter are used to implement ELFv2 homogenous
4459  // float aggregates.
4460  if (FPR_idx != Num_FPR_Regs) {
4461  unsigned VReg;
4462 
4463  if (ObjectVT == MVT::f32)
4464  VReg = MF.addLiveIn(FPR[FPR_idx],
4465  Subtarget.hasP8Vector()
4466  ? &PPC::VSSRCRegClass
4467  : &PPC::F4RCRegClass);
4468  else
4469  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4470  ? &PPC::VSFRCRegClass
4471  : &PPC::F8RCRegClass);
4472 
4473  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4474  ++FPR_idx;
4475  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4476  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4477  // once we support fp <-> gpr moves.
4478 
4479  // This can only ever happen in the presence of f32 array types,
4480  // since otherwise we never run out of FPRs before running out
4481  // of GPRs.
4482  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4483  FuncInfo->addLiveInAttr(VReg, Flags);
4484  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4485 
4486  if (ObjectVT == MVT::f32) {
4487  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4488  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4489  DAG.getConstant(32, dl, MVT::i32));
4490  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4491  }
4492 
4493  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4494  } else {
4495  if (CallConv == CallingConv::Fast)
4496  ComputeArgOffset();
4497 
4498  needsLoad = true;
4499  }
4500 
4501  // When passing an array of floats, the array occupies consecutive
4502  // space in the argument area; only round up to the next doubleword
4503  // at the end of the array. Otherwise, each float takes 8 bytes.
4504  if (CallConv != CallingConv::Fast || needsLoad) {
4505  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4506  ArgOffset += ArgSize;
4507  if (Flags.isInConsecutiveRegsLast())
4508  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4509  }
4510  break;
4511  case MVT::v4f32:
4512  case MVT::v4i32:
4513  case MVT::v8i16:
4514  case MVT::v16i8:
4515  case MVT::v2f64:
4516  case MVT::v2i64:
4517  case MVT::v1i128:
4518  case MVT::f128:
4519  // These can be scalar arguments or elements of a vector array type
4520  // passed directly. The latter are used to implement ELFv2 homogenous
4521  // vector aggregates.
4522  if (VR_idx != Num_VR_Regs) {
4523  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4524  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4525  ++VR_idx;
4526  } else {
4527  if (CallConv == CallingConv::Fast)
4528  ComputeArgOffset();
4529  needsLoad = true;
4530  }
4531  if (CallConv != CallingConv::Fast || needsLoad)
4532  ArgOffset += 16;
4533  break;
4534  }
4535 
4536  // We need to load the argument to a virtual register if we determined
4537  // above that we ran out of physical registers of the appropriate type.
4538  if (needsLoad) {
4539  if (ObjSize < ArgSize && !isLittleEndian)
4540  CurArgOffset += ArgSize - ObjSize;
4541  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4542  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4543  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4544  }
4545 
4546  InVals.push_back(ArgVal);
4547  }
4548 
4549  // Area that is at least reserved in the caller of this function.
4550  unsigned MinReservedArea;
4551  if (HasParameterArea)
4552  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4553  else
4554  MinReservedArea = LinkageSize;
4555 
4556  // Set the size that is at least reserved in caller of this function. Tail
4557  // call optimized functions' reserved stack space needs to be aligned so that
4558  // taking the difference between two stack areas will result in an aligned
4559  // stack.
4560  MinReservedArea =
4561  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4562  FuncInfo->setMinReservedArea(MinReservedArea);
4563 
4564  // If the function takes variable number of arguments, make a frame index for
4565  // the start of the first vararg value... for expansion of llvm.va_start.
4566  // On ELFv2ABI spec, it writes:
4567  // C programs that are intended to be *portable* across different compilers
4568  // and architectures must use the header file <stdarg.h> to deal with variable
4569  // argument lists.
4570  if (isVarArg && MFI.hasVAStart()) {
4571  int Depth = ArgOffset;
4572 
4573  FuncInfo->setVarArgsFrameIndex(
4574  MFI.CreateFixedObject(PtrByteSize, Depth, true));
4575  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4576 
4577  // If this function is vararg, store any remaining integer argument regs
4578  // to their spots on the stack so that they may be loaded by dereferencing
4579  // the result of va_next.
4580  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4581  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4582  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4583  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4584  SDValue Store =
4585  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4586  MemOps.push_back(Store);
4587  // Increment the address by four for the next argument to store
4588  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4589  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4590  }
4591  }
4592 
4593  if (!MemOps.empty())
4594  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4595 
4596  return Chain;
4597 }
4598 
4599 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4600 /// adjusted to accommodate the arguments for the tailcall.
4601 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4602  unsigned ParamSize) {
4603 
4604  if (!isTailCall) return 0;
4605 
4607  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4608  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4609  // Remember only if the new adjustment is bigger.
4610  if (SPDiff < FI->getTailCallSPDelta())
4611  FI->setTailCallSPDelta(SPDiff);
4612 
4613  return SPDiff;
4614 }
4615 
4617 
4618 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4619  const TargetMachine &TM) {
4620  // It does not make sense to call callsShareTOCBase() with a caller that
4621  // is PC Relative since PC Relative callers do not have a TOC.
4622 #ifndef NDEBUG
4623  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4624  assert(!STICaller->isUsingPCRelativeCalls() &&
4625  "PC Relative callers do not have a TOC and cannot share a TOC Base");
4626 #endif
4627 
4628  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4629  // don't have enough information to determine if the caller and callee share
4630  // the same TOC base, so we have to pessimistically assume they don't for
4631  // correctness.
4632  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4633  if (!G)
4634  return false;
4635 
4636  const GlobalValue *GV = G->getGlobal();
4637 
4638  // If the callee is preemptable, then the static linker will use a plt-stub
4639  // which saves the toc to the stack, and needs a nop after the call
4640  // instruction to convert to a toc-restore.
4641  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4642  return false;
4643 
4644  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4645  // We may need a TOC restore in the situation where the caller requires a
4646  // valid TOC but the callee is PC Relative and does not.
4647  const Function *F = dyn_cast<Function>(GV);
4648  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4649 
4650  // If we have an Alias we can try to get the function from there.
4651  if (Alias) {
4652  const GlobalObject *GlobalObj = Alias->getBaseObject();
4653  F = dyn_cast<Function>(GlobalObj);
4654  }
4655 
4656  // If we still have no valid function pointer we do not have enough
4657  // information to determine if the callee uses PC Relative calls so we must
4658  // assume that it does.
4659  if (!F)
4660  return false;
4661 
4662  // If the callee uses PC Relative we cannot guarantee that the callee won't
4663  // clobber the TOC of the caller and so we must assume that the two
4664  // functions do not share a TOC base.
4665  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4666  if (STICallee->isUsingPCRelativeCalls())
4667  return false;
4668 
4669  // If the GV is not a strong definition then we need to assume it can be
4670  // replaced by another function at link time. The function that replaces
4671  // it may not share the same TOC as the caller since the callee may be
4672  // replaced by a PC Relative version of the same function.
4673  if (!GV->isStrongDefinitionForLinker())
4674  return false;
4675 
4676  // The medium and large code models are expected to provide a sufficiently
4677  // large TOC to provide all data addressing needs of a module with a
4678  // single TOC.
4679  if (CodeModel::Medium == TM.getCodeModel() ||
4680  CodeModel::Large == TM.getCodeModel())
4681  return true;
4682 
4683  // Any explicitly-specified sections and section prefixes must also match.
4684  // Also, if we're using -ffunction-sections, then each function is always in
4685  // a different section (the same is true for COMDAT functions).
4686  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4687  GV->getSection() != Caller->getSection())
4688  return false;
4689  if (const auto *F = dyn_cast<Function>(GV)) {
4690  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4691  return false;
4692  }
4693 
4694  return true;
4695 }
4696 
4697 static bool
4699  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4700  assert(Subtarget.is64BitELFABI());
4701 
4702  const unsigned PtrByteSize = 8;
4703  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4704 
4705  static const MCPhysReg GPR[] = {
4706  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4707  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4708  };
4709  static const MCPhysReg VR[] = {
4710  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4711  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4712  };
4713 
4714  const unsigned NumGPRs = array_lengthof(GPR);
4715  const unsigned NumFPRs = 13;
4716  const unsigned NumVRs = array_lengthof(VR);
4717  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4718 
4719  unsigned NumBytes = LinkageSize;
4720  unsigned AvailableFPRs = NumFPRs;
4721  unsigned AvailableVRs = NumVRs;
4722 
4723  for (const ISD::OutputArg& Param : Outs) {
4724  if (Param.Flags.isNest()) continue;
4725 
4726  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4727  LinkageSize, ParamAreaSize, NumBytes,
4728  AvailableFPRs, AvailableVRs))
4729  return true;
4730  }
4731  return false;
4732 }
4733 
4734 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4735  if (CB.arg_size() != CallerFn->arg_size())
4736  return false;
4737 
4738  auto CalleeArgIter = CB.arg_begin();
4739  auto CalleeArgEnd = CB.arg_end();
4740  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4741 
4742  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4743  const Value* CalleeArg = *CalleeArgIter;
4744  const Value* CallerArg = &(*CallerArgIter);
4745  if (CalleeArg == CallerArg)
4746  continue;
4747 
4748  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4749  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4750  // }
4751  // 1st argument of callee is undef and has the same type as caller.
4752  if (CalleeArg->getType() == CallerArg->getType() &&
4753  isa<UndefValue>(CalleeArg))
4754  continue;
4755 
4756  return false;
4757  }
4758 
4759  return true;
4760 }
4761 
4762 // Returns true if TCO is possible between the callers and callees
4763 // calling conventions.
4764 static bool
4766  CallingConv::ID CalleeCC) {
4767  // Tail calls are possible with fastcc and ccc.
4768  auto isTailCallableCC = [] (CallingConv::ID CC){
4769  return CC == CallingConv::C || CC == CallingConv::Fast;
4770  };
4771  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4772  return false;
4773 
4774  // We can safely tail call both fastcc and ccc callees from a c calling
4775  // convention caller. If the caller is fastcc, we may have less stack space
4776  // than a non-fastcc caller with the same signature so disable tail-calls in
4777  // that case.
4778  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4779 }
4780 
4781 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4782  SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4783  const SmallVectorImpl<ISD::OutputArg> &Outs,
4784  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4785  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4786 
4787  if (DisableSCO && !TailCallOpt) return false;
4788 
4789  // Variadic argument functions are not supported.
4790  if (isVarArg) return false;
4791 
4792  auto &Caller = DAG.getMachineFunction().getFunction();
4793  // Check that the calling conventions are compatible for tco.
4794  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4795  return false;
4796 
4797  // Caller contains any byval parameter is not supported.
4798  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4799  return false;
4800 
4801  // Callee contains any byval parameter is not supported, too.
4802  // Note: This is a quick work around, because in some cases, e.g.
4803  // caller's stack size > callee's stack size, we are still able to apply
4804  // sibling call optimization. For example, gcc is able to do SCO for caller1
4805  // in the following example, but not for caller2.
4806  // struct test {
4807  // long int a;
4808  // char ary[56];
4809  // } gTest;
4810  // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4811  // b->a = v.a;
4812  // return 0;
4813  // }
4814  // void caller1(struct test a, struct test c, struct test *b) {
4815  // callee(gTest, b); }
4816  // void caller2(struct test *b) { callee(gTest, b); }
4817  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4818  return false;
4819 
4820  // If callee and caller use different calling conventions, we cannot pass
4821  // parameters on stack since offsets for the parameter area may be different.
4822  if (Caller.getCallingConv() != CalleeCC &&
4823  needStackSlotPassParameters(Subtarget, Outs))
4824  return false;
4825 
4826  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4827  // the caller and callee share the same TOC for TCO/SCO. If the caller and
4828  // callee potentially have different TOC bases then we cannot tail call since
4829  // we need to restore the TOC pointer after the call.
4830  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4831  // We cannot guarantee this for indirect calls or calls to external functions.
4832  // When PC-Relative addressing is used, the concept of the TOC is no longer
4833  // applicable so this check is not required.
4834  // Check first for indirect calls.
4835  if (!Subtarget.isUsingPCRelativeCalls() &&
4836  !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4837  return false;
4838 
4839  // Check if we share the TOC base.
4840  if (!Subtarget.isUsingPCRelativeCalls() &&
4842  return false;
4843 
4844  // TCO allows altering callee ABI, so we don't have to check further.
4845  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4846  return true;
4847 
4848  if (DisableSCO) return false;
4849 
4850  // If callee use the same argument list that caller is using, then we can
4851  // apply SCO on this case. If it is not, then we need to check if callee needs
4852  // stack for passing arguments.
4853  // PC Relative tail calls may not have a CallBase.
4854  // If there is no CallBase we cannot verify if we have the same argument
4855  // list so assume that we don't have the same argument list.
4856  if (CB && !hasSameArgumentList(&Caller, *CB) &&
4857  needStackSlotPassParameters(Subtarget, Outs))
4858  return false;
4859  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4860  return false;
4861 
4862  return true;
4863 }
4864 
4865 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4866 /// for tail call optimization. Targets which want to do tail call
4867 /// optimization should implement this function.
4868 bool
4869 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4870  CallingConv::ID CalleeCC,
4871  bool isVarArg,
4873  SelectionDAG& DAG) const {
4874  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4875  return false;
4876 
4877  // Variable argument functions are not supported.
4878  if (isVarArg)
4879  return false;
4880 
4881  MachineFunction &MF = DAG.getMachineFunction();
4882  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4883  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4884  // Functions containing by val parameters are not supported.
4885  for (unsigned i = 0; i != Ins.size(); i++) {
4886  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4887  if (Flags.isByVal()) return false;
4888  }
4889 
4890  // Non-PIC/GOT tail calls are supported.
4891  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4892  return true;
4893 
4894  // At the moment we can only do local tail calls (in same module, hidden
4895  // or protected) if we are generating PIC.
4896  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4897  return G->getGlobal()->hasHiddenVisibility()
4898  || G->getGlobal()->hasProtectedVisibility();
4899  }
4900 
4901  return false;
4902 }
4903 
4904 /// isCallCompatibleAddress - Return the immediate to use if the specified
4905 /// 32-bit value is representable in the immediate field of a BxA instruction.
4907  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4908  if (!C) return nullptr;
4909 
4910  int Addr = C->getZExtValue();
4911  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4912  SignExtend32<26>(Addr) != Addr)
4913  return nullptr; // Top 6 bits have to be sext of immediate.
4914 
4915  return DAG
4916  .getConstant(
4917  (int)C->getZExtValue() >> 2, SDLoc(Op),
4919  .getNode();
4920 }
4921 
4922 namespace {
4923 
4924 struct TailCallArgumentInfo {
4925  SDValue Arg;
4926  SDValue FrameIdxOp;
4927  int FrameIdx = 0;
4928 
4929  TailCallArgumentInfo() = default;
4930 };
4931 
4932 } // end anonymous namespace
4933 
4934 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4936  SelectionDAG &DAG, SDValue Chain,
4937  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4938  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4939  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4940  SDValue Arg = TailCallArgs[i].Arg;
4941  SDValue FIN = TailCallArgs[i].FrameIdxOp;
4942  int FI = TailCallArgs[i].FrameIdx;
4943  // Store relative to framepointer.
4944  MemOpChains.push_back(DAG.getStore(
4945  Chain, dl, Arg, FIN,
4947  }
4948 }
4949 
4950 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4951 /// the appropriate stack slot for the tail call optimized function call.
4953  SDValue OldRetAddr, SDValue OldFP,
4954  int SPDiff, const SDLoc &dl) {
4955  if (SPDiff) {
4956  // Calculate the new stack slot for the return address.
4957  MachineFunction &MF = DAG.getMachineFunction();
4958  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4959  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4960  bool isPPC64 = Subtarget.isPPC64();
4961  int SlotSize = isPPC64 ? 8 : 4;
4962  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4963  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4964  NewRetAddrLoc, true);
4965  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4966  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4967  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4968  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4969  }
4970  return Chain;
4971 }
4972 
4973 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4974 /// the position of the argument.
4975 static void
4977  SDValue Arg, int SPDiff, unsigned ArgOffset,
4978  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4979  int Offset = ArgOffset + SPDiff;
4980  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4981  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4982  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4983  SDValue FIN = DAG.getFrameIndex(FI, VT);
4984  TailCallArgumentInfo Info;
4985  Info.Arg = Arg;
4986  Info.FrameIdxOp = FIN;
4987  Info.FrameIdx = FI;
4988  TailCallArguments.push_back(Info);
4989 }
4990 
4991 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4992 /// stack slot. Returns the chain as result and the loaded frame pointers in
4993 /// LROpOut/FPOpout. Used when tail calling.
4994 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4995  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4996  SDValue &FPOpOut, const SDLoc &dl) const {
4997  if (SPDiff) {
4998  // Load the LR and FP stack slot for later adjusting.
4999  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5000  LROpOut = getReturnAddrFrameIndex(DAG);
5001  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5002  Chain = SDValue(LROpOut.getNode(), 1);
5003  }
5004  return Chain;
5005 }
5006 
5007 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5008 /// by "Src" to address "Dst" of size "Size". Alignment information is
5009 /// specified by the specific parameter attribute. The copy will be passed as
5010 /// a byval function parameter.
5011 /// Sometimes what we are copying is the end of a larger object, the part that
5012 /// does not fit in registers.
5014  SDValue Chain, ISD::ArgFlagsTy Flags,
5015  SelectionDAG &DAG, const SDLoc &dl) {
5016  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5017  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5018  Flags.getNonZeroByValAlign(), false, false, false,
5020 }
5021 
5022 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5023 /// tail calls.
5024 static void LowerMemOpCallTo(
5025  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5026  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5027  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5028  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5029  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5030  if (!isTailCall) {
5031  if (isVector) {
5032  SDValue StackPtr;
5033  if (isPPC64)
5034  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5035  else
5036  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5037  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5038  DAG.getConstant(ArgOffset, dl, PtrVT));
5039  }
5040  MemOpChains.push_back(
5041  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5042  // Calculate and remember argument location.
5043  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5044  TailCallArguments);
5045 }
5046 
5047 static void
5049  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5050  SDValue FPOp,
5051  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5052  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5053  // might overwrite each other in case of tail call optimization.
5054  SmallVector<SDValue, 8> MemOpChains2;
5055  // Do not flag preceding copytoreg stuff together with the following stuff.
5056  InFlag = SDValue();
5057  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5058  MemOpChains2, dl);
5059  if (!MemOpChains2.empty())
5060  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5061 
5062  // Store the return address to the appropriate stack slot.
5063  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5064 
5065  // Emit callseq_end just before tailcall node.
5066  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5067  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5068  InFlag = Chain.getValue(1);
5069 }
5070 
5071 // Is this global address that of a function that can be called by name? (as
5072 // opposed to something that must hold a descriptor for an indirect call).
5074  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5075  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5076  Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5077  return false;
5078 
5079  return G->getGlobal()->getValueType()->isFunctionTy();
5080  }
5081 
5082  return false;
5083 }
5084 
5085 SDValue PPCTargetLowering::LowerCallResult(
5086  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5087  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5088  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5090  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5091  *DAG.getContext());
5092 
5093  CCRetInfo.AnalyzeCallResult(
5094  Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5095  ? RetCC_PPC_Cold
5096  : RetCC_PPC);
5097 
5098  // Copy all of the result registers out of their specified physreg.
5099  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5100  CCValAssign &VA = RVLocs[i];
5101  assert(VA.isRegLoc() && "Can only return in registers!");
5102 
5103  SDValue Val;
5104 
5105  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5106  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5107  InFlag);
5108  Chain = Lo.getValue(1);
5109  InFlag = Lo.getValue(2);
5110  VA = RVLocs[++i]; // skip ahead to next loc
5111  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5112  InFlag);
5113  Chain = Hi.getValue(1);
5114  InFlag = Hi.getValue(2);
5115  if (!Subtarget.isLittleEndian())
5116  std::swap (Lo, Hi);
5117  Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5118  } else {
5119  Val = DAG.getCopyFromReg(Chain, dl,
5120  VA.getLocReg(), VA.getLocVT(), InFlag);
5121  Chain = Val.getValue(1);
5122  InFlag = Val.getValue(2);
5123  }
5124 
5125  switch (VA.getLocInfo()) {
5126  default: llvm_unreachable("Unknown loc info!");
5127  case CCValAssign::Full: break;
5128  case CCValAssign::AExt:
5129  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5130  break;
5131  case CCValAssign::ZExt:
5132  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5133  DAG.getValueType(VA.getValVT()));
5134  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5135  break;
5136  case CCValAssign::SExt:
5137  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5138  DAG.getValueType(VA.getValVT()));
5139  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5140  break;
5141  }
5142 
5143  InVals.push_back(Val);
5144  }
5145 
5146  return Chain;
5147 }
5148 
5149 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5150  const PPCSubtarget &Subtarget, bool isPatchPoint) {
5151  // PatchPoint calls are not indirect.
5152  if (isPatchPoint)
5153  return false;
5154 
5155  if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
5156  return false;
5157 
5158  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5159  // becuase the immediate function pointer points to a descriptor instead of
5160  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5161  // pointer immediate points to the global entry point, while the BLA would
5162  // need to jump to the local entry point (see rL211174).
5163  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5165  return false;
5166 
5167  return true;
5168 }
5169 
5170 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5171 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5172  return Subtarget.isAIXABI() ||
5173  (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5174 }
5175 
5177  const Function &Caller,
5178  const SDValue &Callee,
5179  const PPCSubtarget &Subtarget,
5180  const TargetMachine &TM) {
5181  if (CFlags.IsTailCall)
5182  return PPCISD::TC_RETURN;
5183 
5184  // This is a call through a function pointer.
5185  if (CFlags.IsIndirect) {
5186  // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5187  // indirect calls. The save of the caller's TOC pointer to the stack will be
5188  // inserted into the DAG as part of call lowering. The restore of the TOC
5189  // pointer is modeled by using a pseudo instruction for the call opcode that
5190  // represents the 2 instruction sequence of an indirect branch and link,
5191  // immediately followed by a load of the TOC pointer from the the stack save
5192  // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5193  // as it is not saved or used.
5195  : PPCISD::BCTRL;
5196  }
5197 
5198  if (Subtarget.isUsingPCRelativeCalls()) {
5199  assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5200  return PPCISD::CALL_NOTOC;
5201  }
5202 
5203  // The ABIs that maintain a TOC pointer accross calls need to have a nop
5204  // immediately following the call instruction if the caller and callee may
5205  // have different TOC bases. At link time if the linker determines the calls
5206  // may not share a TOC base, the call is redirected to a trampoline inserted
5207  // by the linker. The trampoline will (among other things) save the callers
5208  // TOC pointer at an ABI designated offset in the linkage area and the linker
5209  // will rewrite the nop to be a load of the TOC pointer from the linkage area
5210  // into gpr2.
5211  if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5212  return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5213  : PPCISD::CALL_NOP;
5214 
5215  return PPCISD::CALL;
5216 }
5217 
5219  const SDLoc &dl, const PPCSubtarget &Subtarget) {
5220  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5221  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5222  return SDValue(Dest, 0);
5223 
5224  // Returns true if the callee is local, and false otherwise.
5225  auto isLocalCallee = [&]() {
5226  const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5227  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5228  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5229 
5230  return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5231  !dyn_cast_or_null<GlobalIFunc>(GV);
5232  };
5233 
5234  // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5235  // a static relocation model causes some versions of GNU LD (2.17.50, at
5236  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5237  // built with secure-PLT.
5238  bool UsePlt =
5239  Subtarget.is32BitELFABI() && !isLocalCallee() &&
5241 
5242  const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5243  const TargetMachine &TM = Subtarget.getTargetMachine();
5244  const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5245  MCSymbolXCOFF *S =
5246  cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5247 
5248  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5249  return DAG.getMCSymbol(S, PtrVT);
5250  };
5251 
5253  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5254 
5255  if (Subtarget.isAIXABI()) {
5256  assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5257  return getAIXFuncEntryPointSymbolSDNode(GV);
5258  }
5259  return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5260  UsePlt ? PPCII::MO_PLT : 0);
5261  }
5262 
5263  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5264  const char *SymName = S->getSymbol();
5265  if (Subtarget.isAIXABI()) {
5266  // If there exists a user-declared function whose name is the same as the
5267  // ExternalSymbol's, then we pick up the user-declared version.
5268  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5269  if (const Function *F =
5270  dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5271  return getAIXFuncEntryPointSymbolSDNode(F);
5272 
5273  // On AIX, direct function calls reference the symbol for the function's
5274  // entry point, which is named by prepending a "." before the function's
5275  // C-linkage name. A Qualname is returned here because an external
5276  // function entry point is a csect with XTY_ER property.
5277  const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5278  auto &Context = DAG.getMachineFunction().getMMI().getContext();
5279  MCSectionXCOFF *Sec = Context.getXCOFFSection(
5280  (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5282  return Sec->getQualNameSymbol();
5283  };
5284 
5285  SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5286  }
5287  return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5288  UsePlt ? PPCII::MO_PLT : 0);
5289  }
5290 
5291  // No transformation needed.
5292  assert(Callee.getNode() && "What no callee?");
5293  return Callee;
5294 }
5295 
5297  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5298  "Expected a CALLSEQ_STARTSDNode.");
5299 
5300  // The last operand is the chain, except when the node has glue. If the node
5301  // has glue, then the last operand is the glue, and the chain is the second
5302  // last operand.
5303  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5304  if (LastValue.getValueType() != MVT::Glue)
5305  return LastValue;
5306 
5307  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5308 }
5309 
5310 // Creates the node that moves a functions address into the count register
5311 // to prepare for an indirect call instruction.
5313  SDValue &Glue, SDValue &Chain,
5314  const SDLoc &dl) {
5315  SDValue MTCTROps[] = {Chain, Callee, Glue};
5316  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5317  Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5318  makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5319  // The glue is the second value produced.
5320  Glue = Chain.getValue(1);
5321 }
5322 
5324  SDValue &Glue, SDValue &Chain,
5325  SDValue CallSeqStart,
5326  const CallBase *CB, const SDLoc &dl,
5327  bool hasNest,
5328  const PPCSubtarget &Subtarget) {
5329  // Function pointers in the 64-bit SVR4 ABI do not point to the function
5330  // entry point, but to the function descriptor (the function entry point
5331  // address is part of the function descriptor though).
5332  // The function descriptor is a three doubleword structure with the
5333  // following fields: function entry point, TOC base address and
5334  // environment pointer.
5335  // Thus for a call through a function pointer, the following actions need
5336  // to be performed:
5337  // 1. Save the TOC of the caller in the TOC save area of its stack
5338  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5339  // 2. Load the address of the function entry point from the function
5340  // descriptor.
5341  // 3. Load the TOC of the callee from the function descriptor into r2.
5342  // 4. Load the environment pointer from the function descriptor into
5343  // r11.
5344  // 5. Branch to the function entry point address.
5345  // 6. On return of the callee, the TOC of the caller needs to be
5346  // restored (this is done in FinishCall()).
5347  //
5348  // The loads are scheduled at the beginning of the call sequence, and the
5349  // register copies are flagged together to ensure that no other
5350  // operations can be scheduled in between. E.g. without flagging the
5351  // copies together, a TOC access in the caller could be scheduled between
5352  // the assignment of the callee TOC and the branch to the callee, which leads
5353  // to incorrect code.
5354 
5355  // Start by loading the function address from the descriptor.
5356  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5357  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5361 
5362  MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5363 
5364  // Registers used in building the DAG.
5365  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5366  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5367 
5368  // Offsets of descriptor members.
5369  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5370  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5371 
5372  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5373  const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5374 
5375  // One load for the functions entry point address.
5376  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5377  Alignment, MMOFlags);
5378 
5379  // One for loading the TOC anchor for the module that contains the called
5380  // function.
5381  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5382  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5383  SDValue TOCPtr =
5384  DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5385  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5386 
5387  // One for loading the environment pointer.
5388  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5389  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5390  SDValue LoadEnvPtr =
5391  DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5392  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5393 
5394 
5395  // Then copy the newly loaded TOC anchor to the TOC pointer.
5396  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5397  Chain = TOCVal.getValue(0);
5398  Glue = TOCVal.getValue(1);
5399 
5400  // If the function call has an explicit 'nest' parameter, it takes the
5401  // place of the environment pointer.
5402  assert((!hasNest || !Subtarget.isAIXABI()) &&
5403  "Nest parameter is not supported on AIX.");
5404  if (!hasNest) {
5405  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5406  Chain = EnvVal.getValue(0);
5407  Glue = EnvVal.getValue(1);
5408  }
5409 
5410  // The rest of the indirect call sequence is the same as the non-descriptor
5411  // DAG.
5412  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5413 }
5414 
5415 static void
5417  PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5418  SelectionDAG &DAG,
5419  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5420  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5421  const PPCSubtarget &Subtarget) {
5422  const bool IsPPC64 = Subtarget.isPPC64();
5423  // MVT for a general purpose register.
5424  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5425 
5426  // First operand is always the chain.
5427  Ops.push_back(Chain);
5428 
5429  // If it's a direct call pass the callee as the second operand.
5430  if (!CFlags.IsIndirect)
5431  Ops.push_back(Callee);
5432  else {
5433  assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5434 
5435  // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5436  // on the stack (this would have been done in `LowerCall_64SVR4` or
5437  // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5438  // represents both the indirect branch and a load that restores the TOC
5439  // pointer from the linkage area. The operand for the TOC restore is an add
5440  // of the TOC save offset to the stack pointer. This must be the second
5441  // operand: after the chain input but before any other variadic arguments.
5442  // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5443  // saved or used.
5444  if (isTOCSaveRestoreRequired(Subtarget)) {
5445  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5446 
5447  SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5448  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5449  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5450  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5451  Ops.push_back(AddTOC);
5452  }
5453 
5454  // Add the register used for the environment pointer.
5455  if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5456  Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5457  RegVT));
5458 
5459 
5460  // Add CTR register as callee so a bctr can be emitted later.
5461  if (CFlags.IsTailCall)
5462  Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5463  }
5464 
5465  // If this is a tail call add stack pointer delta.
5466  if (CFlags.IsTailCall)
5467  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5468 
5469  // Add argument registers to the end of the list so that they are known live
5470  // into the call.
5471  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5472  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5473  RegsToPass[i].second.getValueType()));
5474 
5475  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5476  // no way to mark dependencies as implicit here.
5477  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5478  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5479  !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5480  Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5481 
5482  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5483  if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5484  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5485 
5486  // Add a register mask operand representing the call-preserved registers.
5487  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5488  const uint32_t *Mask =
5490  assert(Mask && "Missing call preserved mask for calling convention");
5491  Ops.push_back(DAG.getRegisterMask(Mask));
5492 
5493  // If the glue is valid, it is the last operand.
5494  if (Glue.getNode())
5495  Ops.push_back(Glue);
5496 }
5497 
5498 SDValue PPCTargetLowering::FinishCall(
5499  CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5500  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5501  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5502  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5503  SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5504 
5505  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5506  Subtarget.isAIXABI())
5507  setUsesTOCBasePtr(DAG);
5508 
5509  unsigned CallOpc =
5511  Subtarget, DAG.getTarget());
5512 
5513  if (!CFlags.IsIndirect)
5514  Callee = transformCallee(Callee, DAG, dl, Subtarget);
5515  else if (Subtarget.usesFunctionDescriptors())
5516  prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5517  dl, CFlags.HasNest, Subtarget);
5518  else
5519  prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5520 
5521  // Build the operand list for the call instruction.
5523  buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5524  SPDiff, Subtarget);
5525 
5526  // Emit tail call.
5527  if (CFlags.IsTailCall) {
5528  // Indirect tail call when using PC Relative calls do not have the same
5529  // constraints.
5530  assert(((Callee.getOpcode() == ISD::Register &&
5531  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5532  Callee.getOpcode() == ISD::TargetExternalSymbol ||
5533  Callee.getOpcode() == ISD::TargetGlobalAddress ||
5534  isa<ConstantSDNode>(Callee) ||
5535  (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5536  "Expecting a global address, external symbol, absolute value, "
5537  "register or an indirect tail call when PC Relative calls are "
5538  "used.");
5539  // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5540  assert(CallOpc == PPCISD::TC_RETURN &&
5541  "Unexpected call opcode for a tail call.");
5543  return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5544  }
5545 
5546  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5547  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5548  DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5549  Glue = Chain.getValue(1);
5550 
5551  // When performing tail call optimization the callee pops its arguments off
5552  // the stack. Account for this here so these bytes can be pushed back on in
5553  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5554  int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5556  ? NumBytes
5557  : 0;
5558 
5559  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5560  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5561  Glue, dl);
5562  Glue = Chain.getValue(1);
5563 
5564  return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5565  DAG, InVals);
5566 }
5567 
5568 SDValue
5569 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5570  SmallVectorImpl<SDValue> &InVals) const {
5571  SelectionDAG &DAG = CLI.DAG;
5572  SDLoc &dl = CLI.DL;
5574  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5576  SDValue Chain = CLI.Chain;
5577  SDValue Callee = CLI.Callee;
5578  bool &isTailCall = CLI.IsTailCall;
5579  CallingConv::ID CallConv = CLI.CallConv;
5580  bool isVarArg = CLI.IsVarArg;
5581  bool isPatchPoint = CLI.IsPatchPoint;
5582  const CallBase *CB = CLI.CB;
5583 
5584  if (isTailCall) {
5585  if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5586  isTailCall = false;
5587  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5588  isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5589  Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5590  else
5591  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5592  Ins, DAG);
5593  if (isTailCall) {
5594  ++NumTailCalls;
5595  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5596  ++NumSiblingCalls;
5597 
5598  // PC Relative calls no longer guarantee that the callee is a Global
5599  // Address Node. The callee could be an indirect tail call in which
5600  // case the SDValue for the callee could be a load (to load the address
5601  // of a function pointer) or it may be a register copy (to move the
5602  // address of the callee from a function parameter into a virtual
5603  // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5604  assert((Subtarget.isUsingPCRelativeCalls() ||
5605  isa<GlobalAddressSDNode>(Callee)) &&
5606  "Callee should be an llvm::Function object.");
5607 
5608  LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5609  << "\nTCO callee: ");
5610  LLVM_DEBUG(Callee.dump());
5611  }
5612  }
5613 
5614  if (!isTailCall && CB && CB->isMustTailCall())
5615  report_fatal_error("failed to perform tail call elimination on a call "
5616  "site marked musttail");
5617 
5618  // When long calls (i.e. indirect calls) are always used, calls are always
5619  // made via function pointer. If we have a function name, first translate it
5620  // into a pointer.
5621  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5622  !isTailCall)
5623  Callee = LowerGlobalAddress(Callee, DAG);
5624 
5625  CallFlags CFlags(
5626  CallConv, isTailCall, isVarArg, isPatchPoint,
5627  isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5628  // hasNest
5629  Subtarget.is64BitELFABI() &&
5630  any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5631  CLI.NoMerge);
5632 
5633  if (Subtarget.isAIXABI())
5634  return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5635  InVals, CB);
5636 
5637  assert(Subtarget.isSVR4ABI());
5638  if (Subtarget.isPPC64())
5639  return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5640  InVals, CB);
5641  return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5642  InVals, CB);
5643 }
5644 
5645 SDValue PPCTargetLowering::LowerCall_32SVR4(
5646  SDValue Chain, SDValue Callee, CallFlags CFlags,
5647  const SmallVectorImpl<ISD::OutputArg> &Outs,
5648  const SmallVectorImpl<SDValue> &OutVals,
5649  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5650  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5651  const CallBase *CB) const {
5652  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5653  // of the 32-bit SVR4 ABI stack frame layout.
5654 
5655  const CallingConv::ID CallConv = CFlags.CallConv;
5656  const bool IsVarArg = CFlags.IsVarArg;
5657  const bool IsTailCall = CFlags.IsTailCall;
5658 
5659  assert((CallConv == CallingConv::C ||
5660  CallConv == CallingConv::Cold ||
5661  CallConv == CallingConv::Fast) && "Unknown calling convention!");
5662 
5663  const Align PtrAlign(4);
5664 
5665  MachineFunction &MF = DAG.getMachineFunction();
5666 
5667  // Mark this function as potentially containing a function that contains a
5668  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5669  // and restoring the callers stack pointer in this functions epilog. This is
5670  // done because by tail calling the called function might overwrite the value
5671  // in this function's (MF) stack pointer stack slot 0(SP).
5672  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5673  CallConv == CallingConv::Fast)
5674  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5675 
5676  // Count how many bytes are to be pushed on the stack, including the linkage
5677  // area, parameter list area and the part of the local variable space which
5678  // contains copies of aggregates which are passed by value.
5679 
5680  // Assign locations to all of the outgoing arguments.
5682  PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5683 
5684  // Reserve space for the linkage area on the stack.
5685  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5686  PtrAlign);
5687  if (useSoftFloat())
5688  CCInfo.PreAnalyzeCallOperands(Outs);
5689 
5690  if (IsVarArg) {
5691  // Handle fixed and variable vector arguments differently.
5692  // Fixed vector arguments go into registers as long as registers are
5693  // available. Variable vector arguments always go into memory.
5694  unsigned NumArgs = Outs.size();
5695 
5696  for (unsigned i = 0; i != NumArgs; ++i) {
5697  MVT ArgVT = Outs[i].VT;
5698  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5699  bool Result;
5700 
5701  if (Outs[i].IsFixed) {
5702  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5703  CCInfo);
5704  } else {
5706  ArgFlags, CCInfo);
5707  }
5708 
5709  if (Result) {
5710 #ifndef NDEBUG
5711  errs() << "Call operand #" << i << " has unhandled type "
5712  << EVT(ArgVT).getEVTString() << "\n";
5713 #endif
5714  llvm_unreachable(nullptr);
5715  }
5716  }
5717  } else {
5718  // All arguments are treated the same.
5719  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5720  }
5721  CCInfo.clearWasPPCF128();
5722 
5723  // Assign locations to all of the outgoing aggregate by value arguments.
5724  SmallVector<CCValAssign, 16> ByValArgLocs;
5725  CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5726 
5727  // Reserve stack space for the allocations in CCInfo.
5728  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5729 
5730  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5731 
5732  // Size of the linkage area, parameter list area and the part of the local
5733  // space variable where copies of aggregates which are passed by value are
5734  // stored.
5735  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5736 
5737  // Calculate by how many bytes the stack has to be adjusted in case of tail
5738  // call optimization.
5739  int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5740 
5741  // Adjust the stack pointer for the new arguments...
5742  // These operations are automatically eliminated by the prolog/epilog pass
5743  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5744  SDValue CallSeqStart = Chain;
5745 
5746  // Load the return address and frame pointer so it can be moved somewhere else
5747  // later.
5748  SDValue LROp, FPOp;
5749  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5750 
5751  // Set up a copy of the stack pointer for use loading and storing any
5752  // arguments that may not fit in the registers available for argument
5753  // passing.
5754  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5755 
5757  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5758  SmallVector<SDValue, 8> MemOpChains;
5759 
5760  bool seenFloatArg = false;
5761  // Walk the register/memloc assignments, inserting copies/loads.
5762  // i - Tracks the index into the list of registers allocated for the call
5763  // RealArgIdx - Tracks the index into the list of actual function arguments
5764  // j - Tracks the index into the list of byval arguments
5765  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5766  i != e;
5767  ++i, ++RealArgIdx) {
5768  CCValAssign &VA = ArgLocs[i];
5769  SDValue Arg = OutVals[RealArgIdx];
5770  ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5771 
5772  if (Flags.isByVal()) {
5773  // Argument is an aggregate which is passed by value, thus we need to
5774  // create a copy of it in the local variable space of the current stack
5775  // frame (which is the stack frame of the caller) and pass the address of
5776  // this copy to the callee.
5777  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5778  CCValAssign &ByValVA = ByValArgLocs[j++];
5779  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5780 
5781  // Memory reserved in the local variable space of the callers stack frame.
5782  unsigned LocMemOffset = ByValVA.getLocMemOffset();
5783 
5784  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5785  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5786  StackPtr, PtrOff);
5787 
5788  // Create a copy of the argument in the local area of the current
5789  // stack frame.
5790  SDValue MemcpyCall =
5792  CallSeqStart.getNode()->getOperand(0),
5793  Flags, DAG, dl);
5794 
5795  // This must go outside the CALLSEQ_START..END.
5796  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5797  SDLoc(MemcpyCall));
5798  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5799  NewCallSeqStart.getNode());
5800  Chain = CallSeqStart = NewCallSeqStart;
5801 
5802  // Pass the address of the aggregate copy on the stack either in a
5803  // physical register or in the parameter list area of the current stack
5804  // frame to the callee.
5805  Arg = PtrOff;
5806  }
5807 
5808  // When useCRBits() is true, there can be i1 arguments.
5809  // It is because getRegisterType(MVT::i1) => MVT::i1,
5810  // and for other integer types getRegisterType() => MVT::i32.
5811  // Extend i1 and ensure callee will get i32.
5812  if (Arg.getValueType() == MVT::i1)
5814  dl, MVT::i32, Arg);
5815 
5816  if (VA.isRegLoc()) {
5817  seenFloatArg |= VA.getLocVT().isFloatingPoint();
5818  // Put argument in a physical register.
5819  if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5820  bool IsLE = Subtarget.isLittleEndian();
5821  SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5822  DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5823  RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5824  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5825  DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5826  RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5827  SVal.getValue(0)));
5828  } else
5829  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5830  } else {
5831  // Put argument in the parameter list area of the current stack frame.
5832  assert(VA.isMemLoc());
5833  unsigned LocMemOffset = VA.getLocMemOffset();
5834 
5835  if (!IsTailCall) {
5836  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5837  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5838  StackPtr, PtrOff);
5839 
5840  MemOpChains.push_back(
5841  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5842  } else {
5843  // Calculate and remember argument location.
5844  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5845  TailCallArguments);
5846  }
5847  }
5848  }
5849 
5850  if (!MemOpChains.empty())
5851  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5852 
5853  // Build a sequence of copy-to-reg nodes chained together with token chain
5854  // and flag operands which copy the outgoing args into the appropriate regs.
5855  SDValue InFlag;
5856  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5857  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5858  RegsToPass[i].second, InFlag);
5859  InFlag = Chain.getValue(1);
5860  }
5861 
5862  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5863  // registers.
5864  if (IsVarArg) {
5865  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5866  SDValue Ops[] = { Chain, InFlag };
5867 
5868  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5869  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5870 
5871  InFlag = Chain.getValue(1);
5872  }
5873 
5874  if (IsTailCall)
5875  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5876  TailCallArguments);
5877 
5878  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5879  Callee, SPDiff, NumBytes, Ins, InVals, CB);
5880 }
5881 
5882 // Copy an argument into memory, being careful to do this outside the
5883 // call sequence for the call to which the argument belongs.
5884 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5885  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5886  SelectionDAG &DAG, const SDLoc &dl) const {
5887  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5888  CallSeqStart.getNode()->getOperand(0),
5889  Flags, DAG, dl);
5890  // The MEMCPY must go outside the CALLSEQ_START..END.
5891  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5892  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5893  SDLoc(MemcpyCall));
5894  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5895  NewCallSeqStart.getNode());
5896  return NewCallSeqStart;
5897 }
5898 
5899 SDValue PPCTargetLowering::LowerCall_64SVR4(
5900  SDValue Chain, SDValue Callee, CallFlags CFlags,
5901  const SmallVectorImpl<ISD::OutputArg> &Outs,
5902  const SmallVectorImpl<SDValue> &OutVals,
5903  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5904  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5905  const CallBase *CB) const {
5906  bool isELFv2ABI = Subtarget.isELFv2ABI();
5907  bool isLittleEndian = Subtarget.isLittleEndian();
5908  unsigned NumOps = Outs.size();
5909  bool IsSibCall = false;
5910  bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5911 
5912  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5913  unsigned PtrByteSize = 8;
5914 
5915  MachineFunction &MF = DAG.getMachineFunction();
5916 
5917  if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5918  IsSibCall = true;
5919 
5920  // Mark this function as potentially containing a function that contains a
5921  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5922  // and restoring the callers stack pointer in this functions epilog. This is
5923  // done because by tail calling the called function might overwrite the value
5924  // in this function's (MF) stack pointer stack slot 0(SP).
5925  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5926  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5927 
5928  assert(!(IsFastCall && CFlags.IsVarArg) &&
5929  "fastcc not supported on varargs functions");
5930 
5931  // Count how many bytes are to be pushed on the stack, including the linkage
5932  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5933  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5934  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5935  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5936  unsigned NumBytes = LinkageSize;
5937  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5938 
5939  static const MCPhysReg GPR[] = {
5940  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5941  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5942  };
5943  static const MCPhysReg VR[] = {
5944  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5945  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5946  };
5947 
5948  const unsigned NumGPRs = array_lengthof(GPR);
5949  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5950  const unsigned NumVRs = array_lengthof(VR);
5951 
5952  // On ELFv2, we can avoid allocating the parameter area if all the arguments
5953  // can be passed to the callee in registers.
5954  // For the fast calling convention, there is another check below.
5955  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5956  bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5957  if (!HasParameterArea) {
5958  unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5959  unsigned AvailableFPRs = NumFPRs;
5960  unsigned AvailableVRs = NumVRs;
5961  unsigned NumBytesTmp = NumBytes;
5962  for (unsigned i = 0; i != NumOps; ++i) {
5963  if (Outs[i].Flags.isNest()) continue;
5964  if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5965  PtrByteSize, LinkageSize, ParamAreaSize,
5966  NumBytesTmp, AvailableFPRs, AvailableVRs))
5967  HasParameterArea = true;
5968  }
5969  }
5970 
5971  // When using the fast calling convention, we don't provide backing for
5972  // arguments that will be in registers.
5973  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5974 
5975  // Avoid allocating parameter area for fastcc functions if all the arguments
5976  // can be passed in the registers.
5977  if (IsFastCall)
5978  HasParameterArea = false;
5979 
5980  // Add up all the space actually used.
5981  for (unsigned i = 0; i != NumOps; ++i) {
5982  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5983  EVT ArgVT = Outs[i].VT;
5984  EVT OrigVT = Outs[i].ArgVT;
5985 
5986  if (Flags.isNest())
5987  continue;
5988 
5989  if (IsFastCall) {
5990  if (Flags.isByVal()) {
5991  NumGPRsUsed += (Flags.getByValSize()+7)/8;
5992  if (NumGPRsUsed > NumGPRs)
5993  HasParameterArea = true;
5994  } else {
5995  switch (ArgVT.getSimpleVT().SimpleTy) {
5996  default: llvm_unreachable("Unexpected ValueType for argument!");
5997  case MVT::i1:
5998  case MVT::i32:
5999  case MVT::i64:
6000  if (++NumGPRsUsed <= NumGPRs)
6001  continue;
6002  break;
6003  case MVT::v4i32:
6004  case MVT::v8i16:
6005  case MVT::v16i8:
6006  case MVT::v2f64:
6007  case MVT::v2i64:
6008  case MVT::v1i128:
6009  case MVT::f128:
6010  if (++NumVRsUsed <= NumVRs)
6011  continue;
6012  break;
6013  case MVT::v4f32:
6014  if (++NumVRsUsed <= NumVRs)
6015  continue;
6016  break;
6017  case MVT::f32:
6018  case MVT::f64:
6019  if (++NumFPRsUsed <= NumFPRs)
6020  continue;
6021  break;
6022  }
6023  HasParameterArea = true;
6024  }
6025  }
6026 
6027  /* Respect alignment of argument on the stack. */
6028  auto Alignement =
6029  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6030  NumBytes = alignTo(NumBytes, Alignement);
6031 
6032  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6033  if (Flags.isInConsecutiveRegsLast())
6034  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6035  }
6036 
6037  unsigned NumBytesActuallyUsed = NumBytes;
6038 
6039  // In the old ELFv1 ABI,
6040  // the prolog code of the callee may store up to 8 GPR argument registers to
6041  // the stack, allowing va_start to index over them in memory if its varargs.
6042  // Because we cannot tell if this is needed on the caller side, we have to
6043  // conservatively assume that it is needed. As such, make sure we have at
6044  // least enough stack space for the caller to store the 8 GPRs.
6045  // In the ELFv2 ABI, we allocate the parameter area iff a callee
6046  // really requires memory operands, e.g. a vararg function.
6047  if (HasParameterArea)
6048  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6049  else
6050  NumBytes = LinkageSize;
6051 
6052  // Tail call needs the stack to be aligned.
6053  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6054  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6055 
6056  int SPDiff = 0;
6057 
6058  // Calculate by how many bytes the stack has to be adjusted in case of tail
6059  // call optimization.
6060  if (!IsSibCall)
6061  SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6062 
6063  // To protect arguments on the stack from being clobbered in a tail call,
6064  // force all the loads to happen before doing any other lowering.
6065  if (CFlags.IsTailCall)
6066  Chain = DAG.getStackArgumentTokenFactor(Chain);
6067 
6068  // Adjust the stack pointer for the new arguments...
6069  // These operations are automatically eliminated by the prolog/epilog pass
6070  if (!IsSibCall)
6071  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6072  SDValue CallSeqStart = Chain;
6073 
6074  // Load the return address and frame pointer so it can be move somewhere else
6075  // later.
6076  SDValue LROp, FPOp;
6077  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6078 
6079  // Set up a copy of the stack pointer for use loading and storing any
6080  // arguments that may not fit in the registers available for argument
6081  // passing.
6082  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6083 
6084  // Figure out which arguments are going to go in registers, and which in
6085  // memory. Also, if this is a vararg function, floating point operations
6086  // must be stored to our stack, and loaded into integer regs as well, if
6087  // any integer regs are available for argument passing.
6088  unsigned ArgOffset = LinkageSize;
6089 
6091  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6092 
6093  SmallVector<SDValue, 8> MemOpChains;
6094  for (unsigned i = 0; i != NumOps; ++i) {
6095  SDValue Arg = OutVals[i];
6096  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6097  EVT ArgVT = Outs[i].VT;
6098  EVT OrigVT = Outs[i].ArgVT;
6099 
6100  // PtrOff will be used to store the current argument to the stack if a
6101  // register cannot be found for it.
6102  SDValue PtrOff;
6103 
6104  // We re-align the argument offset for each argument, except when using the
6105  // fast calling convention, when we need to make sure we do that only when
6106  // we'll actually use a stack slot.
6107  auto ComputePtrOff = [&]() {
6108  /* Respect alignment of argument on the stack. */
6109  auto Alignment =
6110  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6111  ArgOffset = alignTo(ArgOffset, Alignment);
6112 
6113  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6114 
6115  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6116  };
6117 
6118  if (!IsFastCall) {
6119  ComputePtrOff();
6120 
6121  /* Compute GPR index associated with argument offset. */
6122  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6123  GPR_idx = std::min(GPR_idx, NumGPRs);
6124  }
6125 
6126  // Promote integers to 64-bit values.
6127  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6128  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6129  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6130  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6131  }
6132 
6133  // FIXME memcpy is used way more than necessary. Correctness first.
6134  // Note: "by value" is code for passing a structure by value, not
6135  // basic types.
6136  if (Flags.isByVal()) {
6137  // Note: Size includes alignment padding, so
6138  // struct x { short a; char b; }
6139  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6140  // These are the proper values we need for right-justifying the
6141  // aggregate in a parameter register.
6142  unsigned Size = Flags.getByValSize();
6143 
6144  // An empty aggregate parameter takes up no storage and no
6145  // registers.
6146  if (Size == 0)
6147  continue;
6148 
6149  if (IsFastCall)
6150  ComputePtrOff();
6151 
6152  // All aggregates smaller than 8 bytes must be passed right-justified.
6153  if (Size==1 || Size==2 || Size==4) {
6154  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6155  if (GPR_idx != NumGPRs) {
6156  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6157  MachinePointerInfo(), VT);
6158  MemOpChains.push_back(Load.getValue(1));
6159  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6160 
6161  ArgOffset += PtrByteSize;
6162  continue;
6163  }
6164  }
6165 
6166  if (GPR_idx == NumGPRs && Size < 8) {
6167  SDValue AddPtr = PtrOff;
6168  if (!isLittleEndian) {
6169  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6170  PtrOff.getValueType());
6171  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6172  }
6173  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6174  CallSeqStart,
6175  Flags, DAG, dl);
6176  ArgOffset += PtrByteSize;
6177  continue;
6178  }
6179  // Copy entire object into memory. There are cases where gcc-generated
6180  // code assumes it is there, even if it could be put entirely into
6181  // registers. (This is not what the doc says.)
6182 
6183  // FIXME: The above statement is likely due to a misunderstanding of the
6184  // documents. All arguments must be copied into the parameter area BY
6185  // THE CALLEE in the event that the callee takes the address of any
6186  // formal argument. That has not yet been implemented. However, it is
6187  // reasonable to use the stack area as a staging area for the register
6188  // load.
6189 
6190  // Skip this for small aggregates, as we will use the same slot for a
6191  // right-justified copy, below.
6192  if (Size >= 8)
6193  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6194  CallSeqStart,
6195  Flags, DAG, dl);
6196 
6197  // When a register is available, pass a small aggregate right-justified.
6198  if (Size < 8 && GPR_idx != NumGPRs) {
6199  // The easiest way to get this right-justified in a register
6200  // is to copy the structure into the rightmost portion of a
6201  // local variable slot, then load the whole slot into the
6202  // register.
6203  // FIXME: The memcpy seems to produce pretty awful code for
6204  // small aggregates, particularly for packed ones.
6205  // FIXME: It would be preferable to use the slot in the
6206  // parameter save area instead of a new local variable.
6207  SDValue AddPtr = PtrOff;
6208  if (!isLittleEndian) {
6209  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6210  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6211  }
6212  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6213  CallSeqStart,
6214  Flags, DAG, dl);
6215 
6216  // Load the slot into the register.
6217  SDValue Load =
6218  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6219  MemOpChains.push_back(Load.getValue(1));
6220  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6221 
6222  // Done with this argument.
6223  ArgOffset += PtrByteSize;
6224  continue;
6225  }
6226 
6227  // For aggregates larger than PtrByteSize, copy the pieces of the
6228  // object that fit into registers from the parameter save area.
6229  for (unsigned j=0; j<Size; j+=PtrByteSize) {
6230  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6231  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6232  if (GPR_idx != NumGPRs) {
6233  SDValue Load =
6234  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6235  MemOpChains.push_back(Load.getValue(1));
6236  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6237  ArgOffset += PtrByteSize;
6238  } else {
6239  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6240  break;
6241  }
6242  }
6243  continue;
6244  }
6245 
6246  switch (Arg.getSimpleValueType().SimpleTy) {
6247  default: llvm_unreachable("Unexpected ValueType for argument!");
6248  case MVT::i1:
6249  case MVT::i32:
6250  case MVT::i64:
6251  if (Flags.isNest()) {
6252  // The 'nest' parameter, if any, is passed in R11.
6253  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6254  break;
6255  }
6256 
6257  // These can be scalar arguments or elements of an integer array type
6258  // passed directly. Clang may use those instead of "byval" aggregate
6259  // types to avoid forcing arguments to memory unnecessarily.
6260  if (GPR_idx != NumGPRs) {
6261  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6262  } else {
6263  if (IsFastCall)
6264  ComputePtrOff();
6265 
6266  assert(HasParameterArea &&
6267  "Parameter area must exist to pass an argument in memory.");
6268  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6269  true, CFlags.IsTailCall, false, MemOpChains,
6270  TailCallArguments, dl);
6271  if (IsFastCall)
6272  ArgOffset += PtrByteSize;
6273  }
6274  if (!IsFastCall)
6275  ArgOffset += PtrByteSize;
6276  break;
6277  case MVT::f32:
6278  case MVT::f64: {
6279  // These can be scalar arguments or elements of a float array type
6280  // passed directly. The latter are used to implement ELFv2 homogenous
6281  // float aggregates.
6282 
6283  // Named arguments go into FPRs first, and once they overflow, the
6284  // remaining arguments go into GPRs and then the parameter save area.
6285  // Unnamed arguments for vararg functions always go to GPRs and
6286  // then the parameter save area. For now, put all arguments to vararg
6287  // routines always in both locations (FPR *and* GPR or stack slot).
6288  bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6289  bool NeededLoad = false;
6290 
6291  // First load the argument into the next available FPR.
6292  if (FPR_idx != NumFPRs)
6293  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6294 
6295  // Next, load the argument into GPR or stack slot if needed.
6296  if (!NeedGPROrStack)
6297  ;
6298  else if (GPR_idx != NumGPRs && !IsFastCall) {
6299  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6300  // once we support fp <-> gpr moves.
6301 
6302  // In the non-vararg case, this can only ever happen in the
6303  // presence of f32 array types, since otherwise we never run
6304  // out of FPRs before running out of GPRs.
6305  SDValue ArgVal;
6306 
6307  // Double values are always passed in a single GPR.
6308  if (Arg.getValueType() != MVT::f32) {
6309  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6310 
6311  // Non-array float values are extended and passed in a GPR.
6312  } else if (!Flags.isInConsecutiveRegs()) {
6313  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6314  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6315 
6316  // If we have an array of floats, we collect every odd element
6317  // together with its predecessor into one GPR.
6318  } else if (ArgOffset % PtrByteSize != 0) {
6319  SDValue Lo, Hi;
6320  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6321  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6322  if (!isLittleEndian)
6323  std::swap(Lo, Hi);
6324  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6325 
6326  // The final element, if even, goes into the first half of a GPR.
6327  } else if (Flags.isInConsecutiveRegsLast()) {
6328  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6329  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6330  if (!isLittleEndian)
6331  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6332  DAG.getConstant(32, dl, MVT::i32));
6333 
6334  // Non-final even elements are skipped; they will be handled
6335  // together the with subsequent argument on the next go-around.
6336  } else
6337  ArgVal = SDValue();
6338 
6339  if (ArgVal.getNode())
6340  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6341  } else {
6342  if (IsFastCall)
6343  ComputePtrOff();
6344 
6345  // Single-precision floating-point values are mapped to the
6346  // second (rightmost) word of the stack doubleword.
6347  if (Arg.getValueType() == MVT::f32 &&
6348  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6349  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6350  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6351  }
6352 
6353  assert(HasParameterArea &&
6354  "Parameter area must exist to pass an argument in memory.");
6355  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6356  true, CFlags.IsTailCall, false, MemOpChains,
6357  TailCallArguments, dl);
6358 
6359  NeededLoad = true;
6360  }
6361  // When passing an array of floats, the array occupies consecutive
6362  // space in the argument area; only round up to the next doubleword
6363  // at the end of the array. Otherwise, each float takes 8 bytes.
6364  if (!IsFastCall || NeededLoad) {
6365  ArgOffset += (Arg.getValueType() == MVT::f32 &&
6366  Flags.isInConsecutiveRegs()) ? 4 : 8;
6367  if (Flags.isInConsecutiveRegsLast())
6368  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6369  }
6370  break;
6371  }
6372  case MVT::v4f32:
6373  case MVT::v4i32:
6374  case MVT::v8i16:
6375  case MVT::v16i8:
6376  case MVT::v2f64:
6377  case MVT::v2i64:
6378  case MVT::v1i128:
6379  case MVT::f128:
6380  // These can be scalar arguments or elements of a vector array type
6381  // passed directly. The latter are used to implement ELFv2 homogenous
6382  // vector aggregates.
6383 
6384  // For a varargs call, named arguments go into VRs or on the stack as
6385  // usual; unnamed arguments always go to the stack or the corresponding
6386  // GPRs when within range. For now, we always put the value in both
6387  // locations (or even all three).
6388  if (CFlags.IsVarArg) {
6389  assert(HasParameterArea &&
6390  "Parameter area must exist if we have a varargs call.");
6391  // We could elide this store in the case where the object fits
6392  // entirely in R registers. Maybe later.
6393  SDValue Store =
6394  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6395  MemOpChains.push_back(Store);
6396  if (VR_idx != NumVRs) {
6397  SDValue Load =
6398  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6399  MemOpChains.push_back(Load.getValue(1));
6400  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6401  }
6402  ArgOffset += 16;
6403  for (unsigned i=0; i<16; i+=PtrByteSize) {
6404  if (GPR_idx == NumGPRs)
6405  break;
6406  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6407  DAG.getConstant(i, dl, PtrVT));
6408  SDValue Load =
6409  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6410  MemOpChains.push_back(Load.getValue(1));
6411  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6412  }
6413  break;
6414  }
6415 
6416  // Non-varargs Altivec params go into VRs or on the stack.
6417  if (VR_idx != NumVRs) {
6418  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6419  } else {
6420  if (IsFastCall)
6421  ComputePtrOff();
6422 
6423  assert(HasParameterArea &&
6424  "Parameter area must exist to pass an argument in memory.");
6425  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6426  true, CFlags.IsTailCall, true, MemOpChains,
6427  TailCallArguments, dl);
6428  if (IsFastCall)
6429  ArgOffset += 16;
6430  }
6431 
6432  if (!IsFastCall)
6433  ArgOffset += 16;
6434  break;
6435  }
6436  }
6437 
6438  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6439  "mismatch in size of parameter area");
6440  (void)NumBytesActuallyUsed;
6441 
6442  if (!MemOpChains.empty())
6443  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6444 
6445  // Check if this is an indirect call (MTCTR/BCTRL).
6446  // See prepareDescriptorIndirectCall and buildCallOperands for more
6447  // information about calls through function pointers in the 64-bit SVR4 ABI.
6448  if (CFlags.IsIndirect) {
6449  // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6450  // caller in the TOC save area.
6451  if (isTOCSaveRestoreRequired(Subtarget)) {
6452  assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6453  // Load r2 into a virtual register and store it to the TOC save area.
6454  setUsesTOCBasePtr(DAG);
6455  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6456  // TOC save area offset.
6457  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6458  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6459  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6460  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6462  DAG.getMachineFunction(), TOCSaveOffset));
6463  }
6464  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6465  // This does not mean the MTCTR instruction must use R12; it's easier
6466  // to model this as an extra parameter, so do that.
6467  if (isELFv2ABI && !CFlags.IsPatchPoint)
6468  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6469  }
6470 
6471  // Build a sequence of copy-to-reg nodes chained together with token chain
6472  // and flag operands which copy the outgoing args into the appropriate regs.
6473  SDValue InFlag;
6474  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6475  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6476  RegsToPass[i].second, InFlag);
6477  InFlag = Chain.getValue(1);
6478  }
6479 
6480  if (CFlags.IsTailCall && !IsSibCall)
6481  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6482  TailCallArguments);
6483 
6484  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6485  Callee, SPDiff, NumBytes, Ins, InVals, CB);
6486 }
6487 
6488 // Returns true when the shadow of a general purpose argument register
6489 // in the parameter save area is aligned to at least 'RequiredAlign'.
6490 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6491  assert(RequiredAlign.value() <= 16 &&
6492  "Required alignment greater than stack alignment.");
6493  switch (Reg) {
6494  default:
6495  report_fatal_error("called on invalid register.");
6496  case PPC::R5:
6497  case PPC::R9:
6498  case PPC::X3:
6499  case PPC::X5:
6500  case PPC::X7:
6501  case PPC::X9:
6502  // These registers are 16 byte aligned which is the most strict aligment
6503  // we can support.
6504  return true;
6505  case PPC::R3:
6506  case PPC::R7:
6507  case PPC::X4:
6508  case PPC::X6:
6509  case PPC::X8:
6510  case PPC::X10:
6511  // The shadow of these registers in the PSA is 8 byte aligned.
6512  return RequiredAlign <= 8;
6513  case PPC::R4:
6514  case PPC::R6:
6515  case PPC::R8:
6516  case PPC::R10:
6517  return RequiredAlign <= 4;
6518  }
6519 }
6520 
6521 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6522  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6523  CCState &S) {
6524  AIXCCState &State = static_cast<AIXCCState &>(S);
6525  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6526  State.getMachineFunction().getSubtarget());
6527  const bool IsPPC64 = Subtarget.isPPC64();
6528  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6529  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6530 
6531  if (ValVT == MVT::f128)
6532  report_fatal_error("f128 is unimplemented on AIX.");
6533 
6534  if (ArgFlags.isNest())
6535  report_fatal_error("Nest arguments are unimplemented.");
6536 
6537  static const MCPhysReg GPR_32[] = {// 32-bit registers.
6538  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6539  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6540  static const MCPhysReg GPR_64[] = {// 64-bit registers.
6541  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6542  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6543 
6544  static const MCPhysReg VR[] = {// Vector registers.
6545  PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6546  PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6547  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6548 
6549  if (ArgFlags.isByVal()) {
6550  if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6551  report_fatal_error("Pass-by-value arguments with alignment greater than "
6552  "register width are not supported.");
6553 
6554  const unsigned ByValSize = ArgFlags.getByValSize();
6555 
6556  // An empty aggregate parameter takes up no storage and no registers,
6557  // but needs a MemLoc for a stack slot for the formal arguments side.
6558  if (ByValSize == 0) {
6560  State.getNextStackOffset(), RegVT,
6561  LocInfo));
6562  return false;
6563  }
6564 
6565  const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6566  unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6567  for (const unsigned E = Offset + StackSize; Offset < E;
6568  Offset += PtrAlign.value()) {
6569  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6570  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6571  else {
6574  LocInfo));
6575  break;
6576  }
6577  }
6578  return false;
6579  }
6580 
6581  // Arguments always reserve parameter save area.
6582  switch (ValVT.SimpleTy) {
6583  default:
6584  report_fatal_error("Unhandled value type for argument.");
6585  case MVT::i64:
6586  // i64 arguments should have been split to i32 for PPC32.
6587  assert(IsPPC64 && "PPC32 should have split i64 values.");
6589  case MVT::i1:
6590  case MVT::i32: {
6591  const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6592  // AIX integer arguments are always passed in register width.
6593  if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6594  LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6595  : CCValAssign::LocInfo::ZExt;
6596  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6597  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6598  else
6599  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6600 
6601  return false;
6602  }
6603  case MVT::f32:
6604  case MVT::f64: {
6605  // Parameter save area (PSA) is reserved even if the float passes in fpr.
6606  const unsigned StoreSize = LocVT.getStoreSize();
6607  // Floats are always 4-byte aligned in the PSA on AIX.
6608  // This includes f64 in 64-bit mode for ABI compatibility.
6609  const unsigned Offset =
6610  State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6611  unsigned FReg = State.AllocateReg(FPR);
6612  if (FReg)
6613  State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6614 
6615  // Reserve and initialize GPRs or initialize the PSA as required.
6616  for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6617  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6618  assert(FReg && "An FPR should be available when a GPR is reserved.");
6619  if (State.isVarArg()) {
6620  // Successfully reserved GPRs are only initialized for vararg calls.
6621  // Custom handling is required for:
6622  // f64 in PPC32 needs to be split into 2 GPRs.
6623  // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6624  State.addLoc(
6625  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6626  }
6627  } else {
6628  // If there are insufficient GPRs, the PSA needs to be initialized.
6629  // Initialization occurs even if an FPR was initialized for
6630  // compatibility with the AIX XL compiler. The full memory for the
6631  // argument will be initialized even if a prior word is saved in GPR.
6632  // A custom memLoc is used when the argument also passes in FPR so
6633  // that the callee handling can skip over it easily.
6634  State.addLoc(
6635  FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6636  LocInfo)
6637  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6638  break;
6639  }
6640  }
6641 
6642  return false;
6643  }
6644  case MVT::v4f32:
6645  case MVT::v4i32:
6646  case MVT::v8i16:
6647  case MVT::v16i8:
6648  case MVT::v2i64:
6649  case MVT::v2f64:
6650  case MVT::v1i128: {
6651  const unsigned VecSize = 16;
6652  const Align VecAlign(VecSize);
6653 
6654  if (!State.isVarArg()) {
6655  // If there are vector registers remaining we don't consume any stack
6656  // space.
6657  if (unsigned VReg = State.AllocateReg(VR)) {
6658  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6659  return false;
6660  }
6661  // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6662  // might be allocated in the portion of the PSA that is shadowed by the
6663  // GPRs.
6664  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6665  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6666  return false;
6667  }
6668 
6669  const unsigned PtrSize = IsPPC64 ? 8 : 4;
6670  ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6671 
6672  unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6673  // Burn any underaligned registers and their shadowed stack space until
6674  // we reach the required alignment.
6675  while (NextRegIndex != GPRs.size() &&
6676  !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6677  // Shadow allocate register and its stack shadow.
6678  unsigned Reg = State.AllocateReg(GPRs);
6679  State.AllocateStack(PtrSize, PtrAlign);
6680  assert(Reg && "Allocating register unexpectedly failed.");
6681  (void)Reg;
6682  NextRegIndex = State.getFirstUnallocated(GPRs);
6683  }
6684 
6685  // Vectors that are passed as fixed arguments are handled differently.
6686  // They are passed in VRs if any are available (unlike arguments passed
6687  // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6688  // functions)
6689  if (State.isFixed(ValNo)) {
6690  if (unsigned VReg = State.AllocateReg(VR)) {
6691  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6692  // Shadow allocate GPRs and stack space even though we pass in a VR.
6693  for (unsigned I = 0; I != VecSize; I += PtrSize)
6694  State.AllocateReg(GPRs);
6695  State.AllocateStack(VecSize, VecAlign);
6696  return false;
6697  }
6698  // No vector registers remain so pass on the stack.
6699  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6700  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6701  return false;
6702  }
6703 
6704  // If all GPRS are consumed then we pass the argument fully on the stack.
6705  if (NextRegIndex == GPRs.size()) {
6706  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6707  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6708  return false;
6709  }
6710 
6711  // Corner case for 32-bit codegen. We have 2 registers to pass the first
6712  // half of the argument, and then need to pass the remaining half on the
6713  // stack.
6714  if (GPRs[NextRegIndex] == PPC::R9) {
6715  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6716  State.addLoc(
6717  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6718 
6719  const unsigned FirstReg = State.AllocateReg(PPC::R9);
6720  const unsigned SecondReg = State.AllocateReg(PPC::R10);
6721  assert(FirstReg && SecondReg &&
6722  "Allocating R9 or R10 unexpectedly failed.");
6723  State.addLoc(
6724  CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6725  State.addLoc(
6726  CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6727  return false;
6728  }
6729 
6730  // We have enough GPRs to fully pass the vector argument, and we have
6731  // already consumed any underaligned registers. Start with the custom
6732  // MemLoc and then the custom RegLocs.
6733  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6734  State.addLoc(
6735  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6736  for (unsigned I = 0; I != VecSize; I += PtrSize) {
6737  const unsigned Reg = State.AllocateReg(GPRs);
6738  assert(Reg && "Failed to allocated register for vararg vector argument");
6739  State.addLoc(
6740  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6741  }
6742  return false;
6743  }
6744  }
6745  return true;
6746 }
6747 
6748 // So far, this function is only used by LowerFormalArguments_AIX()
6750  bool IsPPC64,
6751  bool HasP8Vector,
6752  bool HasVSX) {
6753  assert((IsPPC64 || SVT != MVT::i64) &&
6754  "i64 should have been split for 32-bit codegen.");
6755 
6756  switch (SVT) {
6757  default:
6758  report_fatal_error("Unexpected value type for formal argument");
6759  case MVT::i1:
6760  case MVT::i32:
6761  case MVT::i64:
6762  return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6763  case MVT::f32:
6764  return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6765  case MVT::f64:
6766  return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6767  case MVT::v4f32:
6768  case MVT::v4i32:
6769  case MVT::v8i16:
6770  case MVT::v16i8:
6771  case MVT::v2i64:
6772  case MVT::v2f64:
6773  case MVT::v1i128:
6774  return &PPC::VRRCRegClass;
6775  }
6776 }
6777 
6779  SelectionDAG &DAG, SDValue ArgValue,
6780  MVT LocVT, const SDLoc &dl) {
6781  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6782  assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6783 
6784  if (Flags.isSExt())
6785  ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6786  DAG.getValueType(ValVT));
6787  else if (Flags.isZExt())
6788  ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6789  DAG.getValueType(ValVT));
6790 
6791  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6792 }
6793 
6794 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6795  const unsigned LASize = FL->getLinkageSize();
6796 
6797  if (PPC::GPRCRegClass.contains(Reg)) {
6798  assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6799  "Reg must be a valid argument register!");
6800  return LASize + 4 * (Reg - PPC::R3);
6801  }
6802 
6803  if (PPC::G8RCRegClass.contains(Reg)) {
6804  assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6805  "Reg must be a valid argument register!");
6806  return LASize + 8 * (Reg - PPC::X3);
6807  }
6808 
6809  llvm_unreachable("Only general purpose registers expected.");
6810 }
6811 
6812 // AIX ABI Stack Frame Layout:
6813 //
6814 // Low Memory +--------------------------------------------+
6815 // SP +---> | Back chain | ---+
6816 // | +--------------------------------------------+ |
6817 // | | Saved Condition Register | |
6818 // | +--------------------------------------------+ |
6819 // | | Saved Linkage Register | |
6820 // | +--------------------------------------------+ | Linkage Area
6821 // | | Reserved for compilers | |
6822 // | +--------------------------------------------+ |
6823 // | | Reserved for binders | |
6824 // | +--------------------------------------------+ |
6825 // | | Saved TOC pointer | ---+
6826 // | +--------------------------------------------+
6827 // | | Parameter save area |
6828 // | +--------------------------------------------+
6829 // | | Alloca space |
6830 // | +--------------------------------------------+
6831 // | | Local variable space |
6832 // | +--------------------------------------------+
6833 // | | Float/int conversion temporary |
6834 // | +--------------------------------------------+
6835 // | | Save area for AltiVec registers |
6836 // | +--------------------------------------------+
6837 // | | AltiVec alignment padding |
6838 // | +--------------------------------------------+
6839 // | | Save area for VRSAVE register |
6840 // | +--------------------------------------------+
6841 // | | Save area for General Purpose registers |
6842 // | +--------------------------------------------+
6843 // | | Save area for Floating Point registers |
6844 // | +--------------------------------------------+
6845 // +---- | Back chain |
6846 // High Memory +--------------------------------------------+
6847 //
6848 // Specifications:
6849 // AIX 7.2 Assembler Language Reference
6850 // Subroutine linkage convention
6851 
6852 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6853  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6854  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6855  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6856 
6857  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6858  CallConv == CallingConv::Fast) &&
6859  "Unexpected calling convention!");
6860 
6861  if (getTargetMachine().Options.GuaranteedTailCallOpt)
6862  report_fatal_error("Tail call support is unimplemented on AIX.");
6863 
6864  if (useSoftFloat())
6865  report_fatal_error("Soft float support is unimplemented on AIX.");
6866 
6867  const PPCSubtarget &Subtarget =
6868  static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6869 
6870  const bool IsPPC64 = Subtarget.isPPC64();
6871  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6872 
6873  // Assign locations to all of the incoming arguments.
6875  MachineFunction &MF = DAG.getMachineFunction();
6876  MachineFrameInfo &MFI = MF.getFrameInfo();
6877  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6878  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6879 
6880  const EVT PtrVT = getPointerTy(MF.getDataLayout());
6881  // Reserve space for the linkage area on the stack.
6882  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6883  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6884  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6885 
6886  SmallVector<SDValue, 8> MemOps;
6887 
6888  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6889  CCValAssign &VA = ArgLocs[I++];
6890  MVT LocVT = VA.getLocVT();
6891  MVT ValVT = VA.getValVT();
6892  ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6893  // For compatibility with the AIX XL compiler, the float args in the
6894  // parameter save area are initialized even if the argument is available
6895  // in register. The caller is required to initialize both the register
6896  // and memory, however, the callee can choose to expect it in either.
6897  // The memloc is dismissed here because the argument is retrieved from
6898  // the register.
6899  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
6900  continue;
6901 
6902  auto HandleMemLoc = [&]() {
6903  const unsigned LocSize = LocVT.getStoreSize();
6904  const unsigned ValSize = ValVT.getStoreSize();
6905  assert((ValSize <= LocSize) &&
6906  "Object size is larger than size of MemLoc");
6907  int CurArgOffset = VA.getLocMemOffset();
6908  // Objects are right-justified because AIX is big-endian.
6909  if (LocSize > ValSize)
6910  CurArgOffset += LocSize - ValSize;
6911  // Potential tail calls could cause overwriting of argument stack slots.
6912  const bool IsImmutable =
6914  (CallConv == CallingConv::Fast));
6915  int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6916  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6917  SDValue ArgValue =
6918  DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6919  InVals.push_back(ArgValue);
6920  };
6921 
6922  // Vector arguments to VaArg functions are passed both on the stack, and
6923  // in any available GPRs. Load the value from the stack and add the GPRs
6924  // as live ins.
6925  if (VA.isMemLoc() && VA.needsCustom()) {
6926  assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
6927  assert(isVarArg && "Only use custom memloc for vararg.");
6928  // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
6929  // matching custom RegLocs.
6930  const unsigned OriginalValNo = VA.getValNo();
6931  (void)OriginalValNo;
6932 
6933  auto HandleCustomVecRegLoc = [&]() {
6934  assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
6935  "Missing custom RegLoc.");
6936  VA = ArgLocs[I++];
6937  assert(VA.getValVT().isVector() &&
6938  "Unexpected Val type for custom RegLoc.");
6939  assert(VA.getValNo() == OriginalValNo &&
6940  "ValNo mismatch between custom MemLoc and RegLoc.");
6942  MF.addLiveIn(VA.getLocReg(),
6943  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
6944  Subtarget.hasVSX()));
6945  };
6946 
6947  HandleMemLoc();
6948  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
6949  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
6950  // R10.
6951  HandleCustomVecRegLoc();
6952  HandleCustomVecRegLoc();
6953 
6954  // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
6955  // we passed the vector in R5, R6, R7 and R8.
6956  if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
6957  assert(!IsPPC64 &&
6958  "Only 2 custom RegLocs expected for 64-bit codegen.");
6959  HandleCustomVecRegLoc();
6960  HandleCustomVecRegLoc();
6961  }
6962 
6963  continue;
6964  }
6965 
6966  if (VA.isRegLoc()) {
6967  if (VA.getValVT().isScalarInteger())
6969  else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
6970  switch (VA.getValVT().SimpleTy) {
6971  default:
6972  report_fatal_error("Unhandled value type for argument.");
6973  case MVT::f32:
6975  break;
6976  case MVT::f64:
6978  break;
6979  }
6980  } else if (VA.getValVT().isVector()) {
6981  switch (VA.getValVT().SimpleTy) {
6982  default:
6983  report_fatal_error("Unhandled value type for argument.");
6984  case MVT::v16i8:
6986  break;
6987  case MVT::v8i16:
6989  break;
6990  case MVT::v4i32:
6991  case MVT::v2i64:
6992  case MVT::v1i128:
6994  break;
6995  case MVT::v4f32:
6996  case MVT::v2f64:
6998  break;
6999  }
7000  }
7001  }
7002 
7003  if (Flags.isByVal() && VA.isMemLoc()) {
7004  const unsigned Size =
7005  alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7006  PtrByteSize);
7007  const int FI = MF.getFrameInfo().CreateFixedObject(
7008  Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7009  /* IsAliased */ true);
7010  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7011  InVals.push_back(FIN);
7012 
7013  continue;
7014  }
7015 
7016  if (Flags.isByVal()) {
7017  assert(VA.isRegLoc() && "MemLocs should already be handled.");
7018 
7019  const MCPhysReg ArgReg = VA.getLocReg();
7020  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7021 
7022  if (Flags.getNonZeroByValAlign() > PtrByteSize)
7023  report_fatal_error("Over aligned byvals not supported yet.");
7024 
7025  const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7026  const int FI = MF.getFrameInfo().CreateFixedObject(
7027  StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7028  /* IsAliased */ true);
7029  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7030  InVals.push_back(FIN);
7031 
7032  // Add live ins for all the RegLocs for the same ByVal.
7033  const TargetRegisterClass *RegClass =
7034  IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7035 
7036  auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7037  unsigned Offset) {
7038  const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7039  // Since the callers side has left justified the aggregate in the
7040  // register, we can simply store the entire register into the stack
7041  // slot.
7042  SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7043  // The store to the fixedstack object is needed becuase accessing a
7044  // field of the ByVal will use a gep and load. Ideally we will optimize
7045  // to extracting the value from the register directly, and elide the
7046  // stores when the arguments address is not taken, but that will need to
7047  // be future work.
7048  SDValue Store = DAG.getStore(
7049  CopyFrom.getValue(1), dl, CopyFrom,
7050  DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7052 
7053  MemOps.push_back(Store);
7054  };
7055 
7056  unsigned Offset = 0;
7057  HandleRegLoc(VA.getLocReg(), Offset);
7058  Offset += PtrByteSize;
7059  for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7060  Offset += PtrByteSize) {
7061  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7062  "RegLocs should be for ByVal argument.");
7063 
7064  const CCValAssign RL = ArgLocs[I++];
7065  HandleRegLoc(RL.getLocReg(), Offset);
7067  }
7068 
7069  if (Offset != StackSize) {
7070  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7071  "Expected MemLoc for remaining bytes.");
7072  assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7073  // Consume the MemLoc.The InVal has already been emitted, so nothing
7074  // more needs to be done.
7075  ++I;
7076  }
7077 
7078  continue;
7079  }
7080 
7081  if (VA.isRegLoc() && !VA.needsCustom()) {
7082  MVT::SimpleValueType SVT = ValVT.SimpleTy;
7083  Register VReg =
7084  MF.addLiveIn(VA.getLocReg(),
7085  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7086  Subtarget.hasVSX()));
7087  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7088  if (ValVT.isScalarInteger() &&
7089  (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7090  ArgValue =
7091  truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7092  }
7093  InVals.push_back(ArgValue);
7094  continue;
7095  }
7096  if (VA.isMemLoc()) {
7097  HandleMemLoc();
7098  continue;
7099  }
7100  }
7101 
7102  // On AIX a minimum of 8 words is saved to the parameter save area.
7103  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7104  // Area that is at least reserved in the caller of this function.
7105  unsigned CallerReservedArea =
7106  std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7107 
7108  // Set the size that is at least reserved in caller of this function. Tail
7109  // call optimized function's reserved stack space needs to be aligned so
7110  // that taking the difference between two stack areas will result in an
7111  // aligned stack.
7112  CallerReservedArea =
7113  EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7114  FuncInfo->setMinReservedArea(CallerReservedArea);
7115 
7116  if (isVarArg) {
7117  FuncInfo->setVarArgsFrameIndex(
7118  MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7119  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7120 
7121  static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7122  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7123 
7124  static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7125  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7126  const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7127 
7128  // The fixed integer arguments of a variadic function are stored to the
7129  // VarArgsFrameIndex on the stack so that they may be loaded by
7130  // dereferencing the result of va_next.
7131  for (unsigned GPRIndex =
7132  (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7133  GPRIndex < NumGPArgRegs; ++GPRIndex) {
7134 
7135  const unsigned VReg =
7136  IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7137  : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7138 
7139  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7140  SDValue Store =
7141  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7142  MemOps.push_back(Store);
7143  // Increment the address for the next argument to store.
7144  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7145  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7146  }
7147  }
7148 
7149  if (!MemOps.empty())
7150  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7151 
7152  return Chain;
7153 }
7154 
7155 SDValue PPCTargetLowering::LowerCall_AIX(
7156  SDValue Chain, SDValue Callee, CallFlags CFlags,
7157  const SmallVectorImpl<ISD::OutputArg> &Outs,
7158  const SmallVectorImpl<SDValue> &OutVals,
7159  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7160  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7161  const CallBase *CB) const {
7162  // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7163  // AIX ABI stack frame layout.
7164 
7165  assert((CFlags.CallConv == CallingConv::C ||
7166  CFlags.CallConv == CallingConv::Cold ||
7167  CFlags.CallConv == CallingConv::Fast) &&
7168  "Unexpected calling convention!");
7169 
7170  if (CFlags.IsPatchPoint)
7171  report_fatal_error("This call type is unimplemented on AIX.");
7172 
7173  const PPCSubtarget& Subtarget =
7174  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7175 
7176  MachineFunction &MF = DAG.getMachineFunction();
7178  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7179  *DAG.getContext());
7180 
7181  // Reserve space for the linkage save area (LSA) on the stack.
7182  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7183  // [SP][CR][LR][2 x reserved][TOC].
7184  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7185  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7186  const bool IsPPC64 = Subtarget.isPPC64();
7187  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7188  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7189  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7190  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7191 
7192  // The prolog code of the callee may store up to 8 GPR argument registers to
7193  // the stack, allowing va_start to index over them in memory if the callee
7194  // is variadic.
7195  // Because we cannot tell if this is needed on the caller side, we have to
7196  // conservatively assume that it is needed. As such, make sure we have at
7197  // least enough stack space for the caller to store the 8 GPRs.
7198  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7199  const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7200  CCInfo.getNextStackOffset());
7201 
7202  // Adjust the stack pointer for the new arguments...
7203  // These operations are automatically eliminated by the prolog/epilog pass.
7204  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7205  SDValue CallSeqStart = Chain;
7206 
7208  SmallVector<SDValue, 8> MemOpChains;
7209 
7210  // Set up a copy of the stack pointer for loading and storing any
7211  // arguments that may not fit in the registers available for argument
7212  // passing.
7213  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7214  : DAG.getRegister(PPC::R1, MVT::i32);
7215 
7216  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7217  const unsigned ValNo = ArgLocs[I].getValNo();
7218  SDValue Arg = OutVals[ValNo];
7219  ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7220 
7221  if (Flags.isByVal()) {
7222  const unsigned ByValSize = Flags.getByValSize();
7223 
7224  // Nothing to do for zero-sized ByVals on the caller side.
7225  if (!ByValSize) {
7226  ++I;
7227  continue;
7228  }
7229 
7230  auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7231  return DAG.getExtLoad(
7232  ISD::ZEXTLOAD, dl, PtrVT, Chain,
7233  (LoadOffset != 0)
7234  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7235  : Arg,
7236  MachinePointerInfo(), VT);
7237  };
7238 
7239  unsigned LoadOffset = 0;
7240 
7241  // Initialize registers, which are fully occupied by the by-val argument.
7242  while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7243  SDValue Load = GetLoad(PtrVT, LoadOffset);
7244  MemOpChains.push_back(Load.getValue(1));
7245  LoadOffset += PtrByteSize;
7246  const CCValAssign &ByValVA = ArgLocs[I++];
7247  assert(ByValVA.getValNo() == ValNo &&
7248  "Unexpected location for pass-by-value argument.");
7249  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7250  }
7251 
7252  if (LoadOffset == ByValSize)
7253  continue;
7254 
7255  // There must be one more loc to handle the remainder.
7256  assert(ArgLocs[I].getValNo() == ValNo &&
7257  "Expected additional location for by-value argument.");
7258 
7259  if (ArgLocs[I].isMemLoc()) {
7260  assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7261  const CCValAssign &ByValVA = ArgLocs[I++];
7262  ISD::ArgFlagsTy MemcpyFlags = Flags;
7263  // Only memcpy the bytes that don't pass in register.
7264  MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7265  Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7266  (LoadOffset != 0)
7267  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7268  : Arg,
7269  DAG.getObjectPtrOffset(dl, StackPtr,
7270  TypeSize::Fixed(ByValVA.getLocMemOffset())),
7271  CallSeqStart, MemcpyFlags, DAG, dl);
7272  continue;
7273  }
7274 
7275  // Initialize the final register residue.
7276  // Any residue that occupies the final by-val arg register must be
7277  // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7278  // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7279  // 2 and 1 byte loads.
7280  const unsigned ResidueBytes = ByValSize % PtrByteSize;
7281  assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7282  "Unexpected register residue for by-value argument.");
7283  SDValue ResidueVal;
7284  for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7285  const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7286  const MVT VT =
7287  N == 1 ? MVT::i8
7288  : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7289  SDValue Load = GetLoad(VT, LoadOffset);
7290  MemOpChains.push_back(Load.getValue(1));
7291  LoadOffset += N;
7292  Bytes += N;
7293 
7294  // By-val arguments are passed left-justfied in register.
7295  // Every load here needs to be shifted, otherwise a full register load
7296  // should have been used.
7297  assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7298  "Unexpected load emitted during handling of pass-by-value "
7299  "argument.");
7300  unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7301  EVT ShiftAmountTy =
7302  getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7303  SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7304  SDValue ShiftedLoad =
7305  DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7306  ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7307  ShiftedLoad)
7308  : ShiftedLoad;
7309  }
7310 
7311  const CCValAssign &ByValVA = ArgLocs[I++];
7312  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7313  continue;
7314  }
7315 
7316  CCValAssign &VA = ArgLocs[I++];
7317  const MVT LocVT = VA.getLocVT();
7318  const MVT ValVT = VA.getValVT();
7319 
7320  switch (VA.getLocInfo()) {
7321  default:
7322  report_fatal_error("Unexpected argument extension type.");
7323  case CCValAssign::Full:
7324  break;
7325  case CCValAssign::ZExt:
7326  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7327  break;
7328  case CCValAssign::SExt:
7329  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7330  break;
7331  }
7332 
7333  if (VA.isRegLoc() && !VA.needsCustom()) {
7334  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7335  continue;
7336  }
7337 
7338  // Vector arguments passed to VarArg functions need custom handling when
7339  // they are passed (at least partially) in GPRs.
7340  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7341  assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7342  // Store value to its stack slot.
7343  SDValue PtrOff =
7344  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7345  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7346  SDValue Store =
7347  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7348  MemOpChains.push_back(Store);
7349  const unsigned OriginalValNo = VA.getValNo();
7350  // Then load the GPRs from the stack
7351  unsigned LoadOffset = 0;
7352  auto HandleCustomVecRegLoc = [&]() {
7353  assert(I != E && "Unexpected end of CCvalAssigns.");
7354  assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7355  "Expected custom RegLoc.");
7356  CCValAssign RegVA = ArgLocs[I++];
7357  assert(RegVA.getValNo() == OriginalValNo &&
7358  "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7359  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7360  DAG.getConstant(LoadOffset, dl, PtrVT));
7361  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7362  MemOpChains.push_back(Load.getValue(1));
7363  RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7364  LoadOffset += PtrByteSize;
7365  };
7366 
7367  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7368  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7369  // R10.
7370  HandleCustomVecRegLoc();
7371  HandleCustomVecRegLoc();
7372 
7373  if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7374  ArgLocs[I].getValNo() == OriginalValNo) {
7375  assert(!IsPPC64 &&
7376  "Only 2 custom RegLocs expected for 64-bit codegen.");
7377  HandleCustomVecRegLoc();
7378  HandleCustomVecRegLoc();
7379  }
7380 
7381  continue;
7382  }
7383 
7384  if (VA.isMemLoc()) {
7385  SDValue PtrOff =
7386  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7387  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7388  MemOpChains.push_back(
7389  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7390 
7391  continue;
7392  }
7393 
7394  if (!ValVT.isFloatingPoint())
7396  "Unexpected register handling for calling convention.");
7397 
7398  // Custom handling is used for GPR initializations for vararg float
7399  // arguments.
7400  assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7401  LocVT.isInteger() &&
7402  "Custom register handling only expected for VarArg.");
7403 
7404  SDValue ArgAsInt =
7406 
7407  if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7408  // f32 in 32-bit GPR
7409  // f64 in 64-bit GPR
7410  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7411  else if (Arg.getValueType().getFixedSizeInBits() <
7412  LocVT.getFixedSizeInBits())
7413  // f32 in 64-bit GPR.
7414  RegsToPass.push_back(std::make_pair(
7415  VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7416  else {
7417  // f64 in two 32-bit GPRs
7418  // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7419  assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7420  "Unexpected custom register for argument!");
7421  CCValAssign &GPR1 = VA;
7422  SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7423  DAG.getConstant(32, dl, MVT::i8));
7424  RegsToPass.push_back(std::make_pair(
7425  GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7426 
7427  if (I != E) {
7428  // If only 1 GPR was available, there will only be one custom GPR and
7429  // the argument will also pass in memory.
7430  CCValAssign &PeekArg = ArgLocs[I];
7431  if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7432  assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7433  CCValAssign &GPR2 = ArgLocs[I++];
7434  RegsToPass.push_back(std::make_pair(
7435  GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7436  }
7437  }
7438  }
7439  }
7440 
7441  if (!MemOpChains.empty())
7442  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7443 
7444  // For indirect calls, we need to save the TOC base to the stack for
7445  // restoration after the call.
7446  if (CFlags.IsIndirect) {
7447  assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7448  const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7449  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7450  const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7451  const unsigned TOCSaveOffset =
7452  Subtarget.getFrameLowering()->getTOCSaveOffset();
7453 
7454  setUsesTOCBasePtr(DAG);
7455  SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7456  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7457  SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7458  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7459  Chain = DAG.getStore(
7460  Val.getValue(1), dl, Val, AddPtr,
7461  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7462  }
7463 
7464  // Build a sequence of copy-to-reg nodes chained together with token chain
7465  // and flag operands which copy the outgoing args into the appropriate regs.
7466  SDValue InFlag;
7467  for (auto Reg : RegsToPass) {
7468  Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7469  InFlag = Chain.getValue(1);
7470  }
7471 
7472  const int SPDiff = 0;
7473  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7474  Callee, SPDiff, NumBytes, Ins, InVals, CB);
7475 }
7476 
7477 bool
7478 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7479  MachineFunction &MF, bool isVarArg,
7480  const SmallVectorImpl<ISD::OutputArg> &Outs,
7481  LLVMContext &Context) const {
7483  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7484  return CCInfo.CheckReturn(
7485  Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7486  ? RetCC_PPC_Cold
7487  : RetCC_PPC);
7488 }
7489 
7490 SDValue
7491 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7492  bool isVarArg,
7493  const SmallVectorImpl<ISD::OutputArg> &Outs,
7494  const SmallVectorImpl<SDValue> &OutVals,
7495  const SDLoc &dl, SelectionDAG &DAG) const {
7497  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7498  *DAG.getContext());
7499  CCInfo.AnalyzeReturn(Outs,
7500  (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7501  ? RetCC_PPC_Cold
7502  : RetCC_PPC);
7503 
7504  SDValue Flag;
7505  SmallVector<SDValue, 4> RetOps(1, Chain);
7506 
7507  // Copy the result values into the output registers.
7508  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7509  CCValAssign &VA = RVLocs[i];
7510  assert(VA.isRegLoc() && "Can only return in registers!");
7511 
7512  SDValue Arg = OutVals[RealResIdx];
7513 
7514  switch (VA.getLocInfo()) {
7515  default: llvm_unreachable("Unknown loc info!");
7516  case CCValAssign::Full: break;
7517  case CCValAssign::AExt:
7518  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7519  break;
7520  case CCValAssign::ZExt:
7521  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7522  break;
7523  case CCValAssign::SExt:
7524  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7525  break;
7526  }
7527  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7528  bool isLittleEndian = Subtarget.isLittleEndian();
7529  // Legalize ret f64 -> ret 2 x i32.
7530  SDValue SVal =
7532  DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7533  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7534  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7535  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7536  DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7537  Flag = Chain.getValue(1);
7538  VA = RVLocs[++i]; // skip ahead to next loc
7539  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7540  } else
7541  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7542  Flag = Chain.getValue(1);
7543  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7544  }
7545 
7546  RetOps[0] = Chain; // Update chain.
7547 
7548  // Add the flag if we have it.
7549  if (Flag.getNode())
7550  RetOps.push_back(Flag);
7551 
7552  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7553 }
7554 
7555 SDValue
7556 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7557  SelectionDAG &DAG) const {
7558  SDLoc dl(Op);
7559 
7560  // Get the correct type for integers.
7561  EVT IntVT = Op.getValueType();
7562 
7563  // Get the inputs.
7564  SDValue Chain = Op.getOperand(0);
7565  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7566  // Build a DYNAREAOFFSET node.
7567  SDValue Ops[2] = {Chain, FPSIdx};
7568  SDVTList VTs = DAG.getVTList(IntVT);
7569  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7570 }
7571 
7572 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7573  SelectionDAG &DAG) const {
7574  // When we pop the dynamic allocation we need to restore the SP link.
7575  SDLoc dl(Op);
7576 
7577  // Get the correct type for pointers.
7578  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7579 
7580  // Construct the stack pointer operand.
7581  bool isPPC64 = Subtarget.isPPC64();
7582  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7583  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7584 
7585  // Get the operands for the STACKRESTORE.
7586  SDValue Chain = Op.getOperand(0);
7587  SDValue SaveSP = Op.getOperand(1);
7588 
7589  // Load the old link SP.
7590  SDValue LoadLinkSP =
7591  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7592 
7593  // Restore the stack pointer.
7594  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7595 
7596  // Store the old link SP.
7597  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7598 }
7599 
7600 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7601  MachineFunction &MF = DAG.getMachineFunction();
7602  bool isPPC64 = Subtarget.isPPC64();
7603  EVT PtrVT = getPointerTy(MF.getDataLayout());
7604 
7605  // Get current frame pointer save index. The users of this index will be
7606  // primarily DYNALLOC instructions.
7608  int RASI = FI->getReturnAddrSaveIndex();
7609 
7610  // If the frame pointer save index hasn't been defined yet.
7611  if (!RASI) {
7612  // Find out what the fix offset of the frame pointer save area.
7613  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7614  // Allocate the frame index for frame pointer save area.
7615  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7616  // Save the result.
7617  FI->setReturnAddrSaveIndex(RASI);
7618  }
7619  return DAG.getFrameIndex(RASI, PtrVT);
7620 }
7621 
7622 SDValue
7623 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7624  MachineFunction &MF = DAG.getMachineFunction();
7625  bool isPPC64 = Subtarget.isPPC64();
7626  EVT PtrVT = getPointerTy(MF.getDataLayout());
7627 
7628  // Get current frame pointer save index. The users of this index will be
7629  // primarily DYNALLOC instructions.
7631  int FPSI = FI->getFramePointerSaveIndex();
7632 
7633  // If the frame pointer save index hasn't been defined yet.
7634  if (!FPSI) {
7635  // Find out what the fix offset of the frame pointer save area.
7636  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7637  // Allocate the frame index for frame pointer save area.
7638  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7639  // Save the result.
7640  FI->setFramePointerSaveIndex(FPSI);
7641  }
7642  return DAG.getFrameIndex(FPSI, PtrVT);
7643 }
7644 
7645 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7646  SelectionDAG &DAG) const {
7647  MachineFunction &MF = DAG.getMachineFunction();
7648  // Get the inputs.
7649  SDValue Chain = Op.getOperand(0);
7650  SDValue Size = Op.getOperand(1);
7651  SDLoc dl(Op);
7652 
7653  // Get the correct type for pointers.
7654  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7655  // Negate the size.
7656  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7657  DAG.getConstant(0, dl, PtrVT), Size);
7658  // Construct a node for the frame pointer save index.
7659  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7660  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7661  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7662  if (hasInlineStackProbe(MF))
7663  return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7664  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7665 }
7666 
7667 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7668  SelectionDAG &DAG) const {
7669  MachineFunction &MF = DAG.getMachineFunction();
7670 
7671  bool isPPC64 = Subtarget.isPPC64();
7672  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7673 
7674  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7675  return DAG.getFrameIndex(FI, PtrVT);
7676 }
7677 
7678 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7679  SelectionDAG &DAG) const {
7680  SDLoc DL(Op);
7681  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7683  Op.getOperand(0), Op.getOperand(1));
7684 }
7685 
7686 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7687  SelectionDAG &DAG) const {
7688  SDLoc DL(Op);
7690  Op.getOperand(0), Op.getOperand(1));
7691 }
7692 
7693 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7694  if (Op.getValueType().isVector())
7695  return LowerVectorLoad(Op, DAG);
7696 
7697  assert(Op.getValueType() == MVT::i1 &&
7698  "Custom lowering only for i1 loads");
7699 
7700  // First, load 8 bits into 32 bits, then truncate to 1 bit.
7701 
7702  SDLoc dl(Op);
7703  LoadSDNode *LD = cast<LoadSDNode>(Op);
7704 
7705  SDValue Chain = LD->getChain();
7706  SDValue BasePtr = LD->getBasePtr();
7707  MachineMemOperand *MMO = LD->getMemOperand();
7708 
7709  SDValue NewLD =
7710  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7711  BasePtr, MVT::i8, MMO);
7712  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7713 
7714  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7715  return DAG.getMergeValues(Ops, dl);
7716 }
7717 
7718 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7719  if (Op.getOperand(1).getValueType().isVector())
7720  return LowerVectorStore(Op, DAG);
7721 
7722  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7723  "Custom lowering only for i1 stores");
7724 
7725  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7726 
7727  SDLoc dl(Op);
7728  StoreSDNode *ST = cast<StoreSDNode>(Op);
7729 
7730  SDValue Chain = ST->getChain();
7731  SDValue BasePtr = ST->getBasePtr();
7732  SDValue Value = ST->getValue();
7733  MachineMemOperand *MMO = ST->getMemOperand();
7734 
7736  Value);
7737  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7738 }
7739 
7740 // FIXME: Remove this once the ANDI glue bug is fixed:
7741 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7742  assert(Op.getValueType() == MVT::i1 &&
7743  "Custom lowering only for i1 results");
7744 
7745  SDLoc DL(Op);
7746  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7747 }
7748 
7749 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7750  SelectionDAG &DAG) const {
7751 
7752  // Implements a vector truncate that fits in a vector register as a shuffle.
7753  // We want to legalize vector truncates down to where the source fits in
7754  // a vector register (and target is therefore smaller than vector register
7755  // size). At that point legalization will try to custom lower the sub-legal
7756  // result and get here - where we can contain the truncate as a single target
7757  // operation.
7758 
7759  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7760  // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7761  //
7762  // We will implement it for big-endian ordering as this (where x denotes
7763  // undefined):
7764  // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7765  // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7766  //
7767  // The same operation in little-endian ordering will be:
7768  // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7769  // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7770 
7771  EVT TrgVT = Op.getValueType();
7772  assert(TrgVT.isVector() && "Vector type expected.");
7773  unsigned TrgNumElts = TrgVT.getVectorNumElements();
7774  EVT EltVT = TrgVT.getVectorElementType();
7775  if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7776  TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7777  !isPowerOf2_32(EltVT.getSizeInBits()))
7778  return SDValue();
7779 
7780  SDValue N1 = Op.getOperand(0);
7781  EVT SrcVT = N1.getValueType();
7782  unsigned SrcSize = SrcVT.getSizeInBits();
7783  if (SrcSize > 256 ||
7784  !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7786  return SDValue();
7787  if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7788  return SDValue();
7789 
7790  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7791  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7792 
7793  SDLoc DL(Op);
7794  SDValue Op1, Op2;
7795  if (SrcSize == 256) {
7796  EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7797  EVT SplitVT =
7799  unsigned SplitNumElts = SplitVT.getVectorNumElements();
7800  Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7801  DAG.getConstant(0, DL, VecIdxTy));
7802  Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7803  DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7804  }
7805  else {
7806  Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7807  Op2 = DAG.getUNDEF(WideVT);
7808  }
7809 
7810  // First list the elements we want to keep.
7811  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7812  SmallVector<int, 16> ShuffV;
7813  if (Subtarget.isLittleEndian())
7814  for (unsigned i = 0; i < TrgNumElts; ++i)
7815  ShuffV.push_back(i * SizeMult);
7816  else
7817  for (unsigned i = 1; i <= TrgNumElts; ++i)
7818  ShuffV.push_back(i * SizeMult - 1);
7819 
7820  // Populate the remaining elements with undefs.
7821  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7822  // ShuffV.push_back(i + WideNumElts);
7823  ShuffV.push_back(WideNumElts + 1);
7824 
7825  Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7826  Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7827  return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7828 }
7829 
7830 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7831 /// possible.
7832 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7833  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7834  EVT ResVT = Op.getValueType();
7835  EVT CmpVT = Op.getOperand(0).getValueType();
7836  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7837  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7838  SDLoc dl(Op);
7839 
7840  // Without power9-vector, we don't have native instruction for f128 comparison.
7841  // Following transformation to libcall is needed for setcc:
7842  // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7843  if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7844  SDValue Z = DAG.getSetCC(
7845  dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7846  LHS, RHS, CC);
7847  SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7848  return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7849  }
7850 
7851  // Not FP, or using SPE? Not a fsel.
7852  if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7853  Subtarget.hasSPE())
7854  return Op;
7855 
7856  SDNodeFlags Flags = Op.getNode()->getFlags();
7857 
7858  // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7859  // presence of infinities.
7860  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7861  switch (CC) {
7862  default:
7863  break;
7864  case ISD::SETOGT:
7865  case ISD::SETGT:
7866  return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7867  case ISD::SETOLT:
7868  case ISD::SETLT:
7869  return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7870  }
7871  }
7872 
7873  // We might be able to do better than this under some circumstances, but in
7874  // general, fsel-based lowering of select is a finite-math-only optimization.
7875  // For more information, see section F.3 of the 2.06 ISA specification.
7876  // With ISA 3.0
7877  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7878  (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7879  return Op;
7880 
7881  // If the RHS of the comparison is a 0.0, we don't need to do the
7882  // subtraction at all.
7883  SDValue Sel1;
7884  if (isFloatingPointZero(RHS))
7885  switch (CC) {
7886  default: break; // SETUO etc aren't handled by fsel.
7887  case ISD::SETNE:
7888  std::swap(TV, FV);
7890  case ISD::SETEQ:
7891  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7892  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7893  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7894  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7895  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7896  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7897  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7898  case ISD::SETULT:
7899  case ISD::SETLT:
7900  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7902  case ISD::SETOGE:
7903  case ISD::SETGE:
7904  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7905  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7906  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7907  case ISD::SETUGT:
7908  case ISD::SETGT:
7909  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7911  case ISD::SETOLE:
7912  case ISD::SETLE:
7913  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7914  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7915  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7916  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7917  }
7918 
7919  SDValue Cmp;
7920  switch (CC) {
7921  default: break; // SETUO etc aren't handled by fsel.
7922  case ISD::SETNE:
7923  std::swap(TV, FV);
7925  case ISD::SETEQ:
7926  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7927  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7928  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7929  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7930  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7931  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7932  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7933  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7934  case ISD::SETULT:
7935  case ISD::SETLT:
7936  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7937  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7938  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7939  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7940  case ISD::SETOGE:
7941  case ISD::SETGE:
7942  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7943  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7944  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7945  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7946  case ISD::SETUGT:
7947  case ISD::SETGT:
7948  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7949  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7950  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7951  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7952  case ISD::SETOLE:
7953  case ISD::SETLE:
7954  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7955  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7956  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7957  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7958  }
7959  return Op;
7960 }
7961 
7962 static unsigned getPPCStrictOpcode(unsigned Opc) {
7963  switch (Opc) {
7964  default:
7965  llvm_unreachable("No strict version of this opcode!");
7966  case PPCISD::FCTIDZ:
7967  return PPCISD::STRICT_FCTIDZ;
7968  case PPCISD::FCTIWZ:
7969  return PPCISD::STRICT_FCTIWZ;
7970  case PPCISD::FCTIDUZ:
7971  return PPCISD::STRICT_FCTIDUZ;
7972  case PPCISD::FCTIWUZ:
7973  return PPCISD::STRICT_FCTIWUZ;
7974  case PPCISD::FCFID:
7975  return PPCISD::STRICT_FCFID;
7976  case PPCISD::FCFIDU:
7977  return PPCISD::STRICT_FCFIDU;
7978  case PPCISD::FCFIDS:
7979  return PPCISD::STRICT_FCFIDS;
7980  case PPCISD::FCFIDUS:
7981  return PPCISD::STRICT_FCFIDUS;
7982  }
7983 }
7984 
7986  const PPCSubtarget &Subtarget) {
7987  SDLoc dl(Op);
7988  bool IsStrict = Op->isStrictFPOpcode();
7989  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7990  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7991 
7992  // TODO: Any other flags to propagate?
7993  SDNodeFlags Flags;
7994  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7995 
7996  // For strict nodes, source is the second operand.
7997  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7998  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8000  if (Src.getValueType() == MVT::f32) {
8001  if (IsStrict) {
8002  Src =
8004  DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8005  Chain = Src.getValue(1);
8006  } else
8007  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8008  }
8009  SDValue Conv;
8010  unsigned Opc = ISD::DELETED_NODE;
8011  switch (Op.getSimpleValueType().SimpleTy) {
8012  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8013  case MVT::i32:
8014  Opc = IsSigned ? PPCISD::FCTIWZ
8015  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8016  break;
8017  case MVT::i64:
8018  assert((IsSigned || Subtarget.hasFPCVT()) &&
8019  "i64 FP_TO_UINT is supported only with FPCVT");
8020  Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8021  }
8022  if (IsStrict) {
8023  Opc = getPPCStrictOpcode(Opc);
8024  Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8025  {Chain, Src}, Flags);
8026  } else {
8027  Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8028  }
8029  return Conv;
8030 }
8031 
8032 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8033  SelectionDAG &DAG,
8034  const SDLoc &dl) const {
8035  SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8036  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8037  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8038  bool IsStrict = Op->isStrictFPOpcode();
8039 
8040  // Convert the FP value to an int value through memory.
8041  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8042  (IsSigned || Subtarget.hasFPCVT());
8043  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8044  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8045  MachinePointerInfo MPI =
8047 
8048  // Emit a store to the stack slot.
8049  SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8050  Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8051  if (i32Stack) {
8052  MachineFunction &MF = DAG.getMachineFunction();
8053  Alignment = Align(4);
8054  MachineMemOperand *MMO =
8055  MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8056  SDValue Ops[] = { Chain, Tmp, FIPtr };
8057  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8058  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8059  } else
8060  Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8061 
8062  // Result is a load from the stack slot. If loading 4 bytes, make sure to
8063  // add in a bias on big endian.
8064  if (Op.getValueType() == MVT::i32 && !i32Stack) {
8065  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8066  DAG.getConstant(4, dl, FIPtr.getValueType()));
8067  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8068  }
8069 
8070  RLI.Chain = Chain;
8071  RLI.Ptr = FIPtr;
8072  RLI.MPI = MPI;
8073  RLI.Alignment = Alignment;
8074 }
8075 
8076 /// Custom lowers floating point to integer conversions to use
8077 /// the direct move instructions available in ISA 2.07 to avoid the
8078 /// need for load/store combinations.
8079 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8080  SelectionDAG &DAG,
8081  const SDLoc &dl) const {
8082  SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8083  SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8084  if (Op->isStrictFPOpcode())
8085  return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8086  else
8087  return Mov;
8088 }
8089 
8090 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8091  const SDLoc &dl) const {
8092  bool IsStrict = Op->isStrictFPOpcode();
8093  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8094  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8095  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8096  EVT SrcVT = Src.getValueType();
8097  EVT DstVT = Op.getValueType();
8098 
8099  // FP to INT conversions are legal for f128.
8100  if (SrcVT == MVT::f128)
8101  return Subtarget.hasP9Vector() ? Op : SDValue();
8102 
8103  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8104  // PPC (the libcall is not available).
8105  if (SrcVT == MVT::ppcf128) {
8106  if (DstVT == MVT::i32) {
8107  // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8108  // set other fast-math flags to FP operations in both strict and
8109  // non-strict cases. (FP_TO_SINT, FSUB)
8110  SDNodeFlags Flags;
8111  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8112 
8113  if (IsSigned) {
8115  DAG.getIntPtrConstant(0, dl));
8117  DAG.getIntPtrConstant(1, dl));
8118 
8119  // Add the two halves of the long double in round-to-zero mode, and use
8120  // a smaller FP_TO_SINT.
8121  if (IsStrict) {
8122  SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8124  {Op.getOperand(0), Lo, Hi}, Flags);
8125  return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8127  {Res.getValue(1), Res}, Flags);
8128  } else {
8129  SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8130  return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8131  }
8132  } else {
8133  const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8134  APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8135  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8136  SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8137  if (IsStrict) {
8138  // Sel = Src < 0x80000000
8139  // FltOfs = select Sel, 0.0, 0x80000000
8140  // IntOfs = select Sel, 0, 0x80000000
8141  // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8142  SDValue Chain = Op.getOperand(0);
8143  EVT SetCCVT =
8144  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8145  EVT DstSetCCVT =
8146  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8147  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8148  Chain, true);
8149  Chain = Sel.getValue(1);
8150 
8151  SDValue FltOfs = DAG.getSelect(
8152  dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8153  Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8154 
8155  SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8156  DAG.getVTList(SrcVT, MVT::Other),
8157  {Chain, Src, FltOfs}, Flags);
8158  Chain = Val.getValue(1);
8159  SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8160  DAG.getVTList(DstVT, MVT::Other),
8161  {Chain, Val}, Flags);
8162  Chain = SInt.getValue(1);
8163  SDValue IntOfs = DAG.getSelect(
8164  dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8165  SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8166  return DAG.getMergeValues({Result, Chain}, dl);
8167  } else {
8168  // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8169  // FIXME: generated code sucks.
8170  SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8171  True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8172  True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8173  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8174  return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8175  }
8176  }
8177  }
8178 
8179  return SDValue();
8180  }
8181 
8182  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8183  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8184 
8185  ReuseLoadInfo RLI;
8186  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8187 
8188  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8189  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8190 }
8191 
8192 // We're trying to insert a regular store, S, and then a load, L. If the
8193 // incoming value, O, is a load, we might just be able to have our load use the
8194 // address used by O. However, we don't know if anything else will store to
8195 // that address before we can load from it. To prevent this situation, we need
8196 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8197 // the same chain operand as O, we create a token factor from the chain results
8198 // of O and L, and we replace all uses of O's chain result with that token
8199 // factor (see spliceIntoChain below for this last part).
8200 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8201  ReuseLoadInfo &RLI,
8202  SelectionDAG &DAG,
8203  ISD::LoadExtType ET) const {
8204  // Conservatively skip reusing for constrained FP nodes.
8205  if (Op->isStrictFPOpcode())
8206  return false;
8207 
8208  SDLoc dl(Op);
8209  bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8210  (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8211  if (ET == ISD::NON_EXTLOAD &&
8212  (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8213  isOperationLegalOrCustom(Op.getOpcode(),
8214  Op.getOperand(0).getValueType())) {
8215 
8216  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8217  return true;
8218  }
8219 
8220  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8221  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8222  LD->isNonTemporal())
8223  return false;
8224  if (LD->getMemoryVT() != MemVT)
8225  return false;
8226 
8227  // If the result of the load is an illegal type, then we can't build a
8228  // valid chain for reuse since the legalised loads and token factor node that
8229  // ties the legalised loads together uses a different output chain then the
8230  // illegal load.
8231  if (!isTypeLegal(LD->getValueType(0)))
8232  return false;
8233 
8234  RLI.Ptr = LD->getBasePtr();
8235  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8236  assert(LD->getAddressingMode() == ISD::PRE_INC &&
8237  "Non-pre-inc AM on PPC?");
8238  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8239  LD->getOffset());
8240  }
8241 
8242  RLI.Chain = LD->getChain();
8243  RLI.MPI = LD->getPointerInfo();
8244  RLI.IsDereferenceable = LD->isDereferenceable();
8245  RLI.IsInvariant = LD->isInvariant();
8246  RLI.Alignment = LD->getAlign();
8247  RLI.AAInfo = LD->getAAInfo();
8248  RLI.Ranges = LD->getRanges();
8249 
8250  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8251  return true;
8252 }
8253 
8254 // Given the head of the old chain, ResChain, insert a token factor containing
8255 // it and NewResChain, and make users of ResChain now be users of that token
8256 // factor.
8257 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8258 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8259  SDValue NewResChain,
8260  SelectionDAG &DAG) const {
8261  if (!ResChain)
8262  return;
8263 
8264  SDLoc dl(NewResChain);
8265 
8267  NewResChain, DAG.getUNDEF(MVT::Other));
8268  assert(TF.getNode() != NewResChain.getNode() &&
8269  "A new TF really is required here");
8270 
8271  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8272  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8273 }
8274 
8275 /// Analyze profitability of direct move
8276 /// prefer float load to int load plus direct move
8277 /// when there is no integer use of int load
8278 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8279  SDNode *Origin = Op.getOperand(0).getNode();
8280  if (Origin->getOpcode() != ISD::LOAD)
8281  return true;
8282 
8283  // If there is no LXSIBZX/LXSIHZX, like Power8,
8284  // prefer direct move if the memory size is 1 or 2 bytes.
8285  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8286  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8287  return true;
8288 
8289  for (SDNode::use_iterator UI = Origin->use_begin(),
8290  UE = Origin->use_end();
8291  UI != UE; ++UI) {
8292 
8293  // Only look at the users of the loaded value.
8294  if (UI.getUse().get().getResNo() != 0)
8295  continue;
8296 
8297  if (UI->getOpcode() != ISD::SINT_TO_FP &&
8298  UI->getOpcode() != ISD::UINT_TO_FP &&
8299  UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8300  UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8301  return true;
8302  }
8303 
8304  return false;
8305 }
8306 
8308  const PPCSubtarget &Subtarget,
8309  SDValue Chain = SDValue()) {
8310  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8311  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8312  SDLoc dl(Op);
8313 
8314  // TODO: Any other flags to propagate?
8315  SDNodeFlags Flags;
8316  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8317 
8318  // If we have FCFIDS, then use it when converting to single-precision.
8319  // Otherwise, convert to double-precision and then round.
8320  bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8321  unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8322  : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8323  EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8324  if (Op->isStrictFPOpcode()) {
8325  if (!Chain)
8326  Chain = Op.getOperand(0);
8327  return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8328  DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8329  } else
8330  return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8331 }
8332 
8333 /// Custom lowers integer to floating point conversions to use
8334 /// the direct move instructions available in ISA 2.07 to avoid the
8335 /// need for load/store combinations.
8336 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8337  SelectionDAG &DAG,
8338  const SDLoc &dl) const {
8339  assert((Op.getValueType() == MVT::f32 ||
8340  Op.getValueType() == MVT::f64) &&
8341  "Invalid floating point type as target of conversion");
8342  assert(Subtarget.hasFPCVT() &&
8343  "Int to FP conversions with direct moves require FPCVT");
8344  SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8345  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8346  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8347  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8348  unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8349  SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8350  return convertIntToFP(Op, Mov, DAG, Subtarget);
8351 }
8352 
8353 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8354 
8355  EVT VecVT = Vec.getValueType();
8356  assert(VecVT.isVector() && "Expected a vector type.");
8357  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8358 
8359  EVT EltVT = VecVT.getVectorElementType();
8360  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8361  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8362 
8363  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8364  SmallVector<SDValue, 16> Ops(NumConcat);
8365  Ops[0] = Vec;
8366  SDValue UndefVec = DAG.getUNDEF(VecVT);
8367  for (unsigned i = 1; i < NumConcat; ++i)
8368  Ops[i] = UndefVec;
8369 
8370  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8371 }
8372 
8373 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8374  const SDLoc &dl) const {
8375  bool IsStrict = Op->isStrictFPOpcode();
8376  unsigned Opc = Op.getOpcode();
8377  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8378  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8379  Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8380  "Unexpected conversion type");
8381  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8382  "Supports conversions to v2f64/v4f32 only.");
8383 
8384  // TODO: Any other flags to propagate?
8385  SDNodeFlags Flags;
8386  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8387 
8388  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8389  bool FourEltRes = Op.getValueType() == MVT::v4f32;
8390 
8391  SDValue Wide = widenVec(DAG, Src, dl);
8392  EVT WideVT = Wide.getValueType();
8393  unsigned WideNumElts = WideVT.getVectorNumElements();
8394  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8395 
8396  SmallVector<int, 16> ShuffV;
8397  for (unsigned i = 0; i < WideNumElts; ++i)
8398  ShuffV.push_back(i + WideNumElts);
8399 
8400  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8401  int SaveElts = FourEltRes ? 4 : 2;
8402  if (Subtarget.isLittleEndian())
8403  for (int i = 0; i < SaveElts; i++)
8404  ShuffV[i * Stride] = i;
8405  else
8406  for (int i = 1; i <= SaveElts; i++)
8407  ShuffV[i * Stride - 1] = i - 1;
8408 
8409  SDValue ShuffleSrc2 =
8410  SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8411  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8412 
8413  SDValue Extend;
8414  if (SignedConv) {
8415  Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8416  EVT ExtVT = Src.getValueType();
8417  if (Subtarget.hasP9Altivec())
8418  ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8419  IntermediateVT.getVectorNumElements());
8420 
8421  Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8422  DAG.getValueType(ExtVT));
8423  } else
8424  Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8425 
8426  if (IsStrict)
8427  return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8428  {Op.getOperand(0), Extend}, Flags);
8429 
8430  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8431 }
8432 
8433 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8434  SelectionDAG &DAG) const {
8435  SDLoc dl(Op);
8436  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8437  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8438  bool IsStrict = Op->isStrictFPOpcode();
8439  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8440  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8441 
8442  // TODO: Any other flags to propagate?
8443  SDNodeFlags Flags;
8444  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8445 
8446  EVT InVT = Src.getValueType();
8447  EVT OutVT = Op.getValueType();
8448  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8449  isOperationCustom(Op.getOpcode(), InVT))
8450  return LowerINT_TO_FPVector(Op, DAG, dl);
8451 
8452  // Conversions to f128 are legal.
8453  if (Op.getValueType() == MVT::f128)
8454  return Subtarget.hasP9Vector() ? Op : SDValue();
8455 
8456  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8457  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8458  return SDValue();
8459 
8460  if (Src.getValueType() == MVT::i1) {
8461  SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8462  DAG.getConstantFP(1.0, dl, Op.getValueType()),
8463  DAG.getConstantFP(0.0, dl, Op.getValueType()));
8464  if (IsStrict)
8465  return DAG.getMergeValues({Sel, Chain}, dl);
8466  else
8467  return Sel;
8468  }
8469 
8470  // If we have direct moves, we can do all the conversion, skip the store/load
8471  // however, without FPCVT we can't do most conversions.
8472  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8473  Subtarget.isPPC64() && Subtarget.hasFPCVT())
8474  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8475 
8476  assert((IsSigned || Subtarget.hasFPCVT()) &&
8477  "UINT_TO_FP is supported only with FPCVT");
8478 
8479  if (Src.getValueType() == MVT::i64) {
8480  SDValue SINT = Src;
8481  // When converting to single-precision, we actually need to convert
8482  // to double-precision first and then round to single-precision.
8483  // To avoid double-rounding effects during that operation, we have
8484  // to prepare the input operand. Bits that might be truncated when
8485  // converting to double-precision are replaced by a bit that won't
8486  // be lost at this stage, but is below the single-precision rounding
8487  // position.
8488  //
8489  // However, if -enable-unsafe-fp-math is in effect, accept double
8490  // rounding to avoid the extra overhead.
8491  if (Op.getValueType() == MVT::f32 &&
8492  !Subtarget.hasFPCVT() &&
8493  !DAG.getTarget().Options.UnsafeFPMath) {
8494 
8495  // Twiddle input to make sure the low 11 bits are zero. (If this
8496  // is the case, we are guaranteed the value will fit into the 53 bit
8497  // mantissa of an IEEE double-precision value without rounding.)
8498  // If any of those low 11 bits were not zero originally, make sure
8499  // bit 12 (value 2048) is set instead, so that the final rounding
8500  // to single-precision gets the correct result.
8501  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8502  SINT, DAG.getConstant(2047, dl, MVT::i64));
8503  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8504  Round, DAG.getConstant(2047, dl, MVT::i64));
8505  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8506  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8507  Round, DAG.getConstant(-2048, dl, MVT::i64));
8508 
8509  // However, we cannot use that value unconditionally: if the magnitude
8510  // of the input value is small, the bit-twiddling we did above might
8511  // end up visibly changing the output. Fortunately, in that case, we
8512  // don't need to twiddle bits since the original input will convert
8513  // exactly to double-precision floating-point already. Therefore,
8514  // construct a conditional to use the original value if the top 11
8515  // bits are all sign-bit copies, and use the rounded value computed
8516  // above otherwise.
8517  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8518  SINT, DAG.getConstant(53, dl, MVT::i32));
8519  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8520  Cond, DAG.getConstant(1, dl, MVT::i64));
8521  Cond = DAG.getSetCC(
8522  dl,
8524  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8525 
8526  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8527  }
8528 
8529  ReuseLoadInfo RLI;
8530  SDValue Bits;
8531 
8532  MachineFunction &MF = DAG.getMachineFunction();
8533  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8534  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8535  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8536  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8537  } else if (Subtarget.hasLFIWAX() &&
8538  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8539  MachineMemOperand *MMO =
8541  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8542  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8545  Ops, MVT::i32, MMO);
8546  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8547  } else if (Subtarget.hasFPCVT() &&
8548  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8549  MachineMemOperand *MMO =
8551  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8552  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8555  Ops, MVT::i32, MMO);
8556  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8557  } else if (((Subtarget.hasLFIWAX() &&
8558  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8559  (Subtarget.hasFPCVT() &&
8560  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8561  SINT.getOperand(0).getValueType() == MVT::i32) {
8562  MachineFrameInfo &MFI = MF.getFrameInfo();
8563  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8564 
8565  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8566  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8567 
8568  SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8570  DAG.getMachineFunction(), FrameIdx));
8571  Chain = Store;
8572 
8573  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8574  "Expected an i32 store");
8575 
8576  RLI.Ptr = FIdx;
8577  RLI.Chain = Chain;
8578  RLI.MPI =
8580  RLI.Alignment = Align(4);
8581 
8582  MachineMemOperand *MMO =
8584  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8585  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8588  dl, DAG.getVTList(MVT::f64, MVT::Other),
8589  Ops, MVT::i32, MMO);
8590  Chain = Bits.getValue(1);
8591  } else
8592  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8593 
8594  SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8595  if (IsStrict)
8596  Chain = FP.getValue(1);
8597 
8598  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8599  if (IsStrict)
8600  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8602  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8603  else
8604  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8605  DAG.getIntPtrConstant(0, dl));
8606  }
8607  return FP;
8608  }
8609 
8610  assert(Src.getValueType() == MVT::i32 &&
8611  "Unhandled INT_TO_FP type in custom expander!");
8612  // Since we only generate this in 64-bit mode, we can take advantage of
8613  // 64-bit registers. In particular, sign extend the input value into the
8614  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8615  // then lfd it and fcfid it.
8616  MachineFunction &MF = DAG.getMachineFunction();
8617  MachineFrameInfo &MFI = MF.getFrameInfo();
8618  EVT PtrVT = getPointerTy(MF.getDataLayout());
8619 
8620  SDValue Ld;
8621  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8622  ReuseLoadInfo RLI;
8623  bool ReusingLoad;
8624  if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8625  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8626  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8627 
8628  SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8630  DAG.getMachineFunction(), FrameIdx));
8631  Chain = Store;
8632 
8633  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8634  "Expected an i32 store");
8635 
8636  RLI.Ptr = FIdx;
8637  RLI.Chain = Chain;
8638  RLI.MPI =
8640  RLI.Alignment = Align(4);
8641  }
8642 
8643  MachineMemOperand *MMO =
8645  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8646  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8647  Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8648  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8649  MVT::i32, MMO);
8650  Chain = Ld.getValue(1);
8651  if (ReusingLoad)
8652  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8653  } else {
8654  assert(Subtarget.isPPC64() &&
8655  "i32->FP without LFIWAX supported only on PPC64");
8656 
8657  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8658  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8659 
8660  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8661 
8662  // STD the extended value into the stack slot.
8663  SDValue Store = DAG.getStore(
8664  Chain, dl, Ext64, FIdx,
8666  Chain = Store;
8667 
8668  // Load the value as a double.
8669  Ld = DAG.getLoad(
8670  MVT::f64, dl, Chain, FIdx,
8672  Chain = Ld.getValue(1);
8673  }
8674 
8675  // FCFID it and return it.
8676  SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8677  if (IsStrict)
8678  Chain = FP.getValue(1);
8679  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8680  if (IsStrict)
8681  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8683  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8684  else
8685  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8686  DAG.getIntPtrConstant(0, dl));
8687  }
8688  return FP;
8689 }
8690 
8691 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8692  SelectionDAG &DAG) const {
8693  SDLoc dl(Op);
8694  /*
8695  The rounding mode is in bits 30:31 of FPSR, and has the following
8696  settings:
8697  00 Round to nearest
8698  01 Round to 0
8699  10 Round to +inf
8700  11 Round to -inf
8701 
8702  FLT_ROUNDS, on the other hand, expects the following:
8703  -1 Undefined
8704  0 Round to 0
8705  1 Round to nearest
8706  2 Round to +inf
8707  3 Round to -inf
8708 
8709  To perform the conversion, we do:
8710  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8711  */
8712 
8713  MachineFunction &MF = DAG.getMachineFunction();
8714  EVT VT = Op.getValueType();
8715  EVT PtrVT = getPointerTy(MF.getDataLayout());
8716 
8717  // Save FP Control Word to register
8718  SDValue Chain = Op.getOperand(0);
8719  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8720  Chain = MFFS.getValue(1);
8721 
8722  SDValue CWD;
8723  if (isTypeLegal(MVT::i64)) {
8724  CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8725  DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8726  } else {
8727  // Save FP register to stack slot
8728  int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8729  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8730  Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8731 
8732  // Load FP Control Word from low 32 bits of stack slot.
8734  "Stack slot adjustment is valid only on big endian subtargets!");
8735  SDValue Four = DAG.getConstant(4, dl, PtrVT);
8736  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8737  CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8738  Chain = CWD.getValue(1);
8739  }
8740 
8741  // Transform as necessary
8742  SDValue CWD1 =
8743  DAG.getNode(ISD::AND, dl, MVT::i32,
8744  CWD, DAG.getConstant(3, dl, MVT::i32));
8745  SDValue CWD2 =
8746  DAG.getNode(ISD::SRL, dl, MVT::i32,
8747  DAG.getNode(ISD::AND, dl, MVT::i32,
8748  DAG.getNode(ISD::XOR, dl, MVT::i32,
8749  CWD, DAG.getConstant(3, dl, MVT::i32)),
8750  DAG.getConstant(3, dl, MVT::i32)),
8751  DAG.getConstant(1, dl, MVT::i32));
8752 
8753  SDValue RetVal =
8754  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8755 
8756  RetVal =
8758  dl, VT, RetVal);
8759 
8760  return DAG.getMergeValues({RetVal, Chain}, dl);
8761 }
8762 
8763 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8764  EVT VT = Op.getValueType();
8765  unsigned BitWidth = VT.getSizeInBits();
8766  SDLoc dl(Op);
8767  assert(Op.getNumOperands() == 3 &&
8768  VT == Op.getOperand(1).getValueType() &&
8769  "Unexpected SHL!");
8770 
8771  // Expand into a bunch of logical ops. Note that these ops
8772  // depend on the PPC behavior for oversized shift amounts.
8773  SDValue Lo = Op.getOperand(0);
8774  SDValue Hi = Op.getOperand(1);
8775  SDValue Amt = Op.getOperand(2);
8776  EVT AmtVT = Amt.getValueType();
8777 
8778  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8779  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8780  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8781  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8782  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8783  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8784  DAG.getConstant(-BitWidth, dl, AmtVT));
8785  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8786  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8787  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8788  SDValue OutOps[] = { OutLo, OutHi };
8789  return DAG.getMergeValues(OutOps, dl);
8790 }
8791 
8792 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8793  EVT VT = Op.getValueType();
8794  SDLoc dl(Op);
8795  unsigned BitWidth = VT.getSizeInBits();
8796  assert(Op.getNumOperands() == 3 &&
8797  VT == Op.getOperand(1).getValueType() &&
8798  "Unexpected SRL!");
8799 
8800  // Expand into a bunch of logical ops. Note that these ops
8801  // depend on the PPC behavior for oversized shift amounts.
8802  SDValue Lo = Op.getOperand(0);
8803  SDValue Hi = Op.getOperand(1);
8804  SDValue Amt = Op.getOperand(2);
8805  EVT AmtVT = Amt.getValueType();
8806 
8807  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8808  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8809  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8810  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8811  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8812  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8813  DAG.getConstant(-BitWidth, dl, AmtVT));
8814  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8815  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8816  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8817  SDValue OutOps[] = { OutLo, OutHi };
8818  return DAG.getMergeValues(OutOps, dl);
8819 }
8820 
8821 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8822  SDLoc dl(Op);
8823  EVT VT = Op.getValueType();
8824  unsigned BitWidth = VT.getSizeInBits();
8825  assert(Op.getNumOperands() == 3 &&
8826  VT == Op.getOperand(1).getValueType() &&
8827  "Unexpected SRA!");
8828 
8829  // Expand into a bunch of logical ops, followed by a select_cc.
8830  SDValue Lo = Op.getOperand(0);
8831  SDValue Hi = Op.getOperand(1);
8832  SDValue Amt = Op.getOperand(2);
8833  EVT AmtVT = Amt.getValueType();
8834 
8835  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8836  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8837  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8838  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8839  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8840  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8841  DAG.getConstant(-BitWidth, dl, AmtVT));
8842  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8843  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8844  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8845  Tmp4, Tmp6, ISD::SETLE);
8846  SDValue OutOps[] = { OutLo, OutHi };
8847  return DAG.getMergeValues(OutOps, dl);
8848 }
8849 
8850 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8851  SelectionDAG &DAG) const {
8852  SDLoc dl(Op);
8853  EVT VT = Op.getValueType();
8854  unsigned BitWidth = VT.getSizeInBits();
8855 
8856  bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8857  SDValue X = Op.getOperand(0);
8858  SDValue Y = Op.getOperand(1);
8859  SDValue Z = Op.getOperand(2);
8860  EVT AmtVT = Z.getValueType();
8861 
8862  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8863  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8864  // This is simpler than TargetLowering::expandFunnelShift because we can rely
8865  // on PowerPC shift by BW being well defined.
8866  Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8867  DAG.getConstant(BitWidth - 1, dl, AmtVT));
8868  SDValue SubZ =
8869  DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8870  X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8871  Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8872  return DAG.getNode(ISD::OR, dl, VT, X, Y);
8873 }
8874 
8875 //===----------------------------------------------------------------------===//
8876 // Vector related lowering.
8877 //
8878 
8879 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8880 /// element size of SplatSize. Cast the result to VT.
8881 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8882  SelectionDAG &DAG, const SDLoc &dl) {
8883  static const MVT VTys[] = { // canonical VT to use for each size.
8885  };
8886 
8887  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8888 
8889  // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8890  if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8891  SplatSize = 1;
8892  Val = 0xFF;
8893  }
8894 
8895  EVT CanonicalVT = VTys[SplatSize-1];
8896 
8897  // Build a canonical splat for this value.
8898  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8899 }
8900 
8901 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8902 /// specified intrinsic ID.
8903 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8904  const SDLoc &dl, EVT DestVT = MVT::Other) {
8905  if (DestVT == MVT::Other) DestVT = Op.getValueType();
8906  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8907  DAG.getConstant(IID, dl, MVT::i32), Op);
8908 }
8909 
8910 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8911 /// specified intrinsic ID.
8912 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8913  SelectionDAG &DAG, const SDLoc &dl,
8914  EVT DestVT = MVT::Other) {
8915  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8916  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8917  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8918 }
8919 
8920 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8921 /// specified intrinsic ID.
8922 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8923  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8924  EVT DestVT = MVT::Other) {
8925  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8926  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8927  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8928 }
8929 
8930 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8931 /// amount. The result has the specified value type.
8932 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8933  SelectionDAG &DAG, const SDLoc &dl) {
8934  // Force LHS/RHS to be the right type.
8935  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8936  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8937 
8938  int Ops[16];
8939  for (unsigned i = 0; i != 16; ++i)
8940  Ops[i] = i + Amt;
8941  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8942  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8943 }
8944 
8945 /// Do we have an efficient pattern in a .td file for this node?
8946 ///
8947 /// \param V - pointer to the BuildVectorSDNode being matched
8948 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8949 ///
8950 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8951 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8952 /// the opposite is true (expansion is beneficial) are:
8953 /// - The node builds a vector out of integers that are not 32 or 64-bits
8954 /// - The node builds a vector out of constants
8955 /// - The node is a "load-and-splat"
8956 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8958  bool HasDirectMove,
8959  bool HasP8Vector) {
8960  EVT VecVT = V->getValueType(0);
8961  bool RightType = VecVT == MVT::v2f64 ||
8962  (HasP8Vector && VecVT == MVT::v4f32) ||
8963  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8964  if (!RightType)
8965  return false;
8966 
8967  bool IsSplat = true;
8968  bool IsLoad = false;
8969  SDValue Op0 = V->getOperand(0);
8970 
8971  // This function is called in a block that confirms the node is not a constant
8972  // splat. So a constant BUILD_VECTOR here means the vector is built out of
8973  // different constants.
8974  if (V->isConstant())
8975  return false;
8976  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8977  if (V->getOperand(i).isUndef())
8978  return false;
8979  // We want to expand nodes that represent load-and-splat even if the
8980  // loaded value is a floating point truncation or conversion to int.
8981  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8982  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8983  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8984  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8985  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8986  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8987  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8988  IsLoad = true;
8989  // If the operands are different or the input is not a load and has more
8990  // uses than just this BV node, then it isn't a splat.
8991  if (V->getOperand(i) != Op0 ||
8992  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8993  IsSplat = false;
8994  }
8995  return !(IsSplat && IsLoad);
8996 }
8997 
8998 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8999 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9000 
9001  SDLoc dl(Op);
9002  SDValue Op0 = Op->getOperand(0);
9003 
9004  if ((Op.getValueType() != MVT::f128) ||
9005  (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9006  (Op0.getOperand(0).getValueType() != MVT::i64) ||
9007  (Op0.getOperand(1).getValueType() != MVT::i64))
9008  return SDValue();
9009 
9010  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9011  Op0.getOperand(1));
9012 }
9013 
9014 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9015  const SDValue *InputLoad = &Op;
9016  if (InputLoad->getOpcode() == ISD::BITCAST)
9017  InputLoad = &InputLoad->getOperand(0);
9018  if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9019  InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9020  IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9021  InputLoad = &InputLoad->getOperand(0);
9022  }
9023  if (InputLoad->getOpcode() != ISD::LOAD)
9024  return nullptr;
9025  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9026  return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9027 }
9028 
9029 // Convert the argument APFloat to a single precision APFloat if there is no
9030 // loss in information during the conversion to single precision APFloat and the
9031 // resulting number is not a denormal number. Return true if successful.
9033  APFloat APFloatToConvert = ArgAPFloat;
9034  bool LosesInfo = true;
9036  &LosesInfo);
9037  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9038  if (Success)
9039  ArgAPFloat = APFloatToConvert;
9040  return Success;
9041 }
9042 
9043 // Bitcast the argument APInt to a double and convert it to a single precision
9044 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9045 // argument if there is no loss in information during the conversion from
9046 // double to single precision APFloat and the resulting number is not a denormal
9047 // number. Return true if successful.
9049  double DpValue = ArgAPInt.bitsToDouble();
9050  APFloat APFloatDp(DpValue);
9051  bool Success = convertToNonDenormSingle(APFloatDp);
9052  if (Success)
9053  ArgAPInt = APFloatDp.bitcastToAPInt();
9054  return Success;
9055 }
9056 
9057 // Nondestructive check for convertTonNonDenormSingle.
9059  // Only convert if it loses info, since XXSPLTIDP should
9060  // handle the other case.
9061  APFloat APFloatToConvert = ArgAPFloat;
9062  bool LosesInfo = true;
9064  &LosesInfo);
9065 
9066  return (!LosesInfo && !APFloatToConvert.isDenormal());
9067 }
9068 
9069 // If this is a case we can't handle, return null and let the default
9070 // expansion code take care of it. If we CAN select this case, and if it
9071 // selects to a single instruction, return Op. Otherwise, if we can codegen
9072 // this case more efficiently than a constant pool load, lower it to the
9073 // sequence of ops that should be used.
9074 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9075  SelectionDAG &DAG) const {
9076  SDLoc dl(Op);
9077  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9078  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9079 
9080  // Check if this is a splat of a constant value.
9081  APInt APSplatBits, APSplatUndef;
9082  unsigned SplatBitSize;
9083  bool HasAnyUndefs;
9084  bool BVNIsConstantSplat =
9085  BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9086  HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9087 
9088  // If it is a splat of a double, check if we can shrink it to a 32 bit
9089  // non-denormal float which when converted back to double gives us the same
9090  // double. This is to exploit the XXSPLTIDP instruction.
9091  // If we lose precision, we use XXSPLTI32DX.
9092  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9093  Subtarget.hasPrefixInstrs()) {
9094  // Check the type first to short-circuit so we don't modify APSplatBits if
9095  // this block isn't executed.
9096  if ((Op->getValueType(0) == MVT::v2f64) &&
9097  convertToNonDenormSingle(APSplatBits)) {
9098  SDValue SplatNode = DAG.getNode(
9100  DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9101  return DAG.getBitcast(Op.getValueType(), SplatNode);
9102  } else {
9103  // We may lose precision, so we have to use XXSPLTI32DX.
9104 
9105  uint32_t Hi =
9106  (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9107  uint32_t Lo =
9108  (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9109  SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9110 
9111  if (!Hi || !Lo)
9112  // If either load is 0, then we should generate XXLXOR to set to 0.
9113  SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9114 
9115  if (Hi)
9116  SplatNode = DAG.getNode(
9117  PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9118  DAG.getTargetConstant(0, dl, MVT::i32),
9119  DAG.getTargetConstant(Hi, dl, MVT::i32));
9120 
9121  if (Lo)
9122  SplatNode =
9123  DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9124  DAG.getTargetConstant(1, dl, MVT::i32),
9125  DAG.getTargetConstant(Lo, dl, MVT::i32));
9126 
9127  return DAG.getBitcast(Op.getValueType(), SplatNode);
9128  }
9129  }
9130 
9131  if (!BVNIsConstantSplat || SplatBitSize > 32) {
9132 
9133  bool IsPermutedLoad = false;
9134  const SDValue *InputLoad =
9135  getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9136  // Handle load-and-splat patterns as we have instructions that will do this
9137  // in one go.
9138  if (InputLoad && DAG.isSplatValue(Op, true)) {
9139  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9140 
9141  // We have handling for 4 and 8 byte elements.
9142  unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9143 
9144  // Checking for a single use of this load, we have to check for vector
9145  // width (128 bits) / ElementSize uses (since each operand of the
9146  // BUILD_VECTOR is a separate use of the value.
9147  unsigned NumUsesOfInputLD = 128 / ElementSize;
9148  for (SDValue BVInOp : Op->ops())
9149  if (BVInOp.isUndef())
9150  NumUsesOfInputLD--;
9151  assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9152  if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9153  ((Subtarget.hasVSX() && ElementSize == 64) ||
9154  (Subtarget.hasP9Vector() && ElementSize == 32))) {
9155  SDValue Ops[] = {
9156  LD->getChain(), // Chain
9157  LD->getBasePtr(), // Ptr
9158  DAG.getValueType(Op.getValueType()) // VT
9159  };
9160  SDValue LdSplt = DAG.getMemIntrinsicNode(
9161  PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
9162  Ops, LD->getMemoryVT(), LD->getMemOperand());
9163  // Replace all uses of the output chain of the original load with the
9164  // output chain of the new load.
9165  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9166  LdSplt.getValue(1));
9167  return LdSplt;
9168  }
9169  }
9170 
9171  // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9172  // 32-bits can be lowered to VSX instructions under certain conditions.
9173  // Without VSX, there is no pattern more efficient than expanding the node.
9174  if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9176  Subtarget.hasP8Vector()))
9177  return Op;
9178  return SDValue();
9179  }
9180 
9181  uint64_t SplatBits = APSplatBits.getZExtValue();
9182  uint64_t SplatUndef = APSplatUndef.getZExtValue();
9183  unsigned SplatSize = SplatBitSize / 8;
9184 
9185  // First, handle single instruction cases.
9186 
9187  // All zeros?
9188  if (SplatBits == 0) {
9189  // Canonicalize all zero vectors to be v4i32.
9190  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9191  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9192  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9193  }
9194  return Op;
9195  }
9196 
9197  // We have XXSPLTIW for constant splats four bytes wide.
9198  // Given vector length is a multiple of 4, 2-byte splats can be replaced
9199  // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9200  // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9201  // turned into a 4-byte splat of 0xABABABAB.
9202  if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9203  return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9204  Op.getValueType(), DAG, dl);
9205 
9206  if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9207  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9208  dl);
9209 
9210  // We have XXSPLTIB for constant splats one byte wide.
9211  if (Subtarget.hasP9Vector() && SplatSize == 1)
9212  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9213  dl);
9214 
9215  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9216  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9217  (32-SplatBitSize));
9218  if (SextVal >= -16 && SextVal <= 15)
9219  return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9220  dl);
9221 
9222  // Two instruction sequences.
9223 
9224  // If this value is in the range [-32,30] and is even, use:
9225  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9226  // If this value is in the range [17,31] and is odd, use:
9227  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9228  // If this value is in the range [-31,-17] and is odd, use:
9229  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9230  // Note the last two are three-instruction sequences.
9231  if (SextVal >= -32 && SextVal <= 31) {
9232  // To avoid having these optimizations undone by constant folding,
9233  // we convert to a pseudo that will be expanded later into one of
9234  // the above forms.
9235  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9236  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9237  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9238  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9239  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9240  if (VT == Op.getValueType())
9241  return RetVal;
9242  else
9243  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9244  }
9245 
9246  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9247  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9248  // for fneg/fabs.
9249  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9250  // Make -1 and vspltisw -1:
9251  SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9252 
9253  // Make the VSLW intrinsic, computing 0x8000_0000.
9254  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9255  OnesV, DAG, dl);
9256 
9257  // xor by OnesV to invert it.
9258  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9259  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9260  }
9261 
9262  // Check to see if this is a wide variety of vsplti*, binop self cases.
9263  static const signed char SplatCsts[] = {
9264  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9265  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9266  };
9267 
9268  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9269  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9270  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9271  int i = SplatCsts[idx];
9272 
9273  // Figure out what shift amount will be used by altivec if shifted by i in
9274  // this splat size.
9275  unsigned TypeShiftAmt = i & (SplatBitSize-1);
9276 
9277  // vsplti + shl self.
9278  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9279  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9280  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9281  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9282  Intrinsic::ppc_altivec_vslw
9283  };
9284  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9285  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9286  }
9287 
9288  // vsplti + srl self.
9289  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9290  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9291  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9292  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9293  Intrinsic::ppc_altivec_vsrw
9294  };
9295  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9296  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9297  }
9298 
9299  // vsplti + rol self.
9300  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9301  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9302  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9303  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9304  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9305  Intrinsic::ppc_altivec_vrlw
9306  };
9307  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9308  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9309  }
9310 
9311  // t = vsplti c, result = vsldoi t, t, 1
9312  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9313  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9314  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9315  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9316  }
9317  // t = vsplti c, result = vsldoi t, t, 2
9318  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9319  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9320  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9321  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9322  }
9323  // t = vsplti c, result = vsldoi t, t, 3
9324  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9325  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9326  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9327  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9328  }
9329  }
9330 
9331  return SDValue();
9332 }
9333 
9334 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9335 /// the specified operations to build the shuffle.
9336 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9337  SDValue RHS, SelectionDAG &DAG,
9338  const SDLoc &dl) {
9339  unsigned OpNum = (PFEntry >> 26) & 0x0F;
9340  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9341  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9342 
9343  enum {
9344  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9345  OP_VMRGHW,
9346  OP_VMRGLW,
9347  OP_VSPLTISW0,
9348  OP_VSPLTISW1,
9349  OP_VSPLTISW2,
9350  OP_VSPLTISW3,
9351  OP_VSLDOI4,
9352  OP_VSLDOI8,
9353  OP_VSLDOI12
9354  };
9355 
9356  if (OpNum == OP_COPY) {
9357  if (LHSID == (1*9+2)*9+3) return LHS;
9358  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9359  return RHS;
9360  }
9361 
9362  SDValue OpLHS, OpRHS;
9363  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9364  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9365 
9366  int ShufIdxs[16];
9367  switch (OpNum) {
9368  default: llvm_unreachable("Unknown i32 permute!");
9369  case OP_VMRGHW:
9370  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9371  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9372  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9373  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9374  break;
9375  case OP_VMRGLW:
9376  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9377  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9378  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9379  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9380  break;
9381  case OP_VSPLTISW0:
9382  for (unsigned i = 0; i != 16; ++i)
9383  ShufIdxs[i] = (i&3)+0;
9384  break;
9385  case OP_VSPLTISW1:
9386  for (unsigned i = 0; i != 16; ++i)
9387  ShufIdxs[i] = (i&3)+4;
9388  break;
9389  case OP_VSPLTISW2:
9390  for (unsigned i = 0; i != 16; ++i)
9391  ShufIdxs[i] = (i&3)+8;
9392  break;
9393  case OP_VSPLTISW3:
9394  for (unsigned i = 0; i != 16; ++i)
9395  ShufIdxs[i] = (i&3)+12;
9396  break;
9397  case OP_VSLDOI4:
9398  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9399  case OP_VSLDOI8:
9400  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9401  case OP_VSLDOI12:
9402  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9403  }
9404  EVT VT = OpLHS.getValueType();
9405  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9406  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9407  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9408  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9409 }
9410 
9411 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9412 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9413 /// SDValue.
9414 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9415  SelectionDAG &DAG) const {
9416  const unsigned BytesInVector = 16;
9417  bool IsLE = Subtarget.isLittleEndian();
9418  SDLoc dl(N);
9419  SDValue V1 = N->getOperand(0);
9420  SDValue V2 = N->getOperand(1);
9421  unsigned ShiftElts = 0, InsertAtByte = 0;
9422  bool Swap = false;
9423 
9424  // Shifts required to get the byte we want at element 7.
9425  unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9426  0, 15, 14, 13, 12, 11, 10, 9};
9427  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9428  1, 2, 3, 4, 5, 6, 7, 8};
9429 
9430  ArrayRef<int> Mask = N->getMask();
9431  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9432 
9433  // For each mask element, find out if we're just inserting something
9434  // from V2 into V1 or vice versa.
9435  // Possible permutations inserting an element from V2 into V1:
9436  // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9437  // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9438  // ...
9439  // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9440  // Inserting from V1 into V2 will be similar, except mask range will be
9441  // [16,31].
9442 
9443  bool FoundCandidate = false;
9444  // If both vector operands for the shuffle are the same vector, the mask
9445  // will contain only elements from the first one and the second one will be
9446  // undef.
9447  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9448  // Go through the mask of half-words to find an element that's being moved
9449  // from one vector to the other.
9450  for (unsigned i = 0; i < BytesInVector; ++i) {
9451  unsigned CurrentElement = Mask[i];
9452  // If 2nd operand is undefined, we should only look for element 7 in the
9453  // Mask.
9454  if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9455  continue;
9456 
9457  bool OtherElementsInOrder = true;
9458  // Examine the other elements in the Mask to see if they're in original
9459  // order.
9460  for (unsigned j = 0; j < BytesInVector; ++j) {
9461  if (j == i)
9462  continue;
9463  // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9464  // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9465  // in which we always assume we're always picking from the 1st operand.
9466  int MaskOffset =
9467  (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9468  if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9469  OtherElementsInOrder = false;
9470  break;
9471  }
9472  }
9473  // If other elements are in original order, we record the number of shifts
9474  // we need to get the element we want into element 7. Also record which byte
9475  // in the vector we should insert into.
9476  if (OtherElementsInOrder) {
9477  // If 2nd operand is undefined, we assume no shifts and no swapping.
9478  if (V2.isUndef()) {
9479  ShiftElts = 0;
9480  Swap = false;
9481  } else {
9482  // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9483  ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9484  : BigEndianShifts[CurrentElement & 0xF];
9485  Swap = CurrentElement < BytesInVector;
9486  }
9487  InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9488  FoundCandidate = true;
9489  break;
9490  }
9491  }
9492 
9493  if (!FoundCandidate)
9494  return SDValue();
9495 
9496  // Candidate found, construct the proper SDAG sequence with VINSERTB,
9497  // optionally with VECSHL if shift is required.
9498  if (Swap)
9499  std::swap(V1, V2);
9500  if (V2.isUndef())
9501  V2 = V1;
9502  if (ShiftElts) {
9503  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9504  DAG.getConstant(ShiftElts, dl, MVT::i32));
9505  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9506  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9507  }
9508  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9509  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9510 }
9511 
9512 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9513 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9514 /// SDValue.
9515 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9516  SelectionDAG &DAG) const {
9517  const unsigned NumHalfWords = 8;
9518  const unsigned BytesInVector = NumHalfWords * 2;
9519  // Check that the shuffle is on half-words.
9520  if (!isNByteElemShuffleMask(N, 2, 1))
9521  return SDValue();
9522 
9523  bool IsLE = Subtarget.isLittleEndian();
9524  SDLoc dl(N);
9525  SDValue V1 = N->getOperand(0);
9526  SDValue V2 = N->getOperand(1);
9527  unsigned ShiftElts = 0, InsertAtByte = 0;
9528  bool Swap = false;
9529 
9530  // Shifts required to get the half-word we want at element 3.
9531  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9532  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9533 
9534  uint32_t Mask = 0;
9535  uint32_t OriginalOrderLow = 0x1234567;
9536  uint32_t OriginalOrderHigh = 0x89ABCDEF;
9537  // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9538  // 32-bit space, only need 4-bit nibbles per element.
9539  for (unsigned i = 0; i < NumHalfWords; ++i) {
9540  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9541  Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9542  }
9543 
9544  // For each mask element, find out if we're just inserting something
9545  // from V2 into V1 or vice versa. Possible permutations inserting an element
9546  // from V2 into V1:
9547  // X, 1, 2, 3, 4, 5, 6, 7
9548  // 0, X, 2, 3, 4, 5, 6, 7
9549  // 0, 1, X, 3, 4, 5, 6, 7
9550  // 0, 1, 2, X, 4, 5, 6, 7
9551  // 0, 1, 2, 3, X, 5, 6, 7
9552  // 0, 1, 2, 3, 4, X, 6, 7
9553  // 0, 1, 2, 3, 4, 5, X, 7
9554  // 0, 1, 2, 3, 4, 5, 6, X
9555  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9556 
9557  bool FoundCandidate = false;
9558  // Go through the mask of half-words to find an element that's being moved
9559  // from one vector to the other.
9560  for (unsigned i = 0; i < NumHalfWords; ++i) {
9561  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9562  uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9563  uint32_t MaskOtherElts = ~(0xF << MaskShift);
9564  uint32_t TargetOrder = 0x0;
9565 
9566  // If both vector operands for the shuffle are the same vector, the mask
9567  // will contain only elements from the first one and the second one will be
9568  // undef.
9569  if (V2.isUndef()) {
9570  ShiftElts = 0;
9571  unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9572  TargetOrder = OriginalOrderLow;
9573  Swap = false;
9574  // Skip if not the correct element or mask of other elements don't equal
9575  // to our expected order.
9576  if (MaskOneElt == VINSERTHSrcElem &&
9577  (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9578  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9579  FoundCandidate = true;
9580  break;
9581  }
9582  } else { // If both operands are defined.
9583  // Target order is [8,15] if the current mask is between [0,7].
9584  TargetOrder =
9585  (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9586  // Skip if mask of other elements don't equal our expected order.
9587  if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9588  // We only need the last 3 bits for the number of shifts.
9589  ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9590  : BigEndianShifts[MaskOneElt & 0x7];
9591  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9592  Swap = MaskOneElt < NumHalfWords;
9593  FoundCandidate = true;
9594  break;
9595  }
9596  }
9597  }
9598 
9599  if (!FoundCandidate)
9600  return SDValue();
9601 
9602  // Candidate found, construct the proper SDAG sequence with VINSERTH,
9603  // optionally with VECSHL if shift is required.
9604  if (Swap)
9605  std::swap(V1, V2);
9606  if (V2.isUndef())
9607  V2 = V1;
9608  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9609  if (ShiftElts) {
9610  // Double ShiftElts because we're left shifting on v16i8 type.
9611  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9612  DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9613  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9614  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9615  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9616  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9617  }
9618  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9619  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9620  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9621  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9622 }
9623 
9624 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9625 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9626 /// return the default SDValue.
9627 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9628  SelectionDAG &DAG) const {
9629  // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9630  // to v16i8. Peek through the bitcasts to get the actual operands.
9631  SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9632  SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9633 
9634  auto ShuffleMask = SVN->getMask();
9635  SDValue VecShuffle(SVN, 0);
9636  SDLoc DL(SVN);
9637 
9638  // Check that we have a four byte shuffle.
9639  if (!isNByteElemShuffleMask(SVN, 4, 1))
9640  return SDValue();
9641 
9642  // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9643  if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9644  std::swap(LHS, RHS);
9646  ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9647  }
9648 
9649  // Ensure that the RHS is a vector of constants.
9650  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9651  if (!BVN)
9652  return SDValue();
9653 
9654  // Check if RHS is a splat of 4-bytes (or smaller).
9655  APInt APSplatValue, APSplatUndef;
9656  unsigned SplatBitSize;
9657  bool HasAnyUndefs;
9658  if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9659  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9660  SplatBitSize > 32)
9661  return SDValue();
9662 
9663  // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9664  // The instruction splats a constant C into two words of the source vector
9665  // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9666  // Thus we check that the shuffle mask is the equivalent of
9667  // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9668  // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9669  // within each word are consecutive, so we only need to check the first byte.
9670  SDValue Index;
9671  bool IsLE = Subtarget.isLittleEndian();
9672  if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9673  (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9674  ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9675  Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9676  else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9677  (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9678  ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9679  Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9680  else
9681  return SDValue();
9682 
9683  // If the splat is narrower than 32-bits, we need to get the 32-bit value
9684  // for XXSPLTI32DX.
9685  unsigned SplatVal = APSplatValue.getZExtValue();
9686  for (; SplatBitSize < 32; SplatBitSize <<= 1)
9687  SplatVal |= (SplatVal << SplatBitSize);
9688 
9689  SDValue SplatNode = DAG.getNode(
9691  Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9692  return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9693 }
9694 
9695 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9696 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9697 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9698 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9699 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9700  assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9701  assert(Op.getValueType() == MVT::v1i128 &&
9702  "Only set v1i128 as custom, other type shouldn't reach here!");
9703  SDLoc dl(Op);
9704  SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9705  SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9706  unsigned SHLAmt = N1.getConstantOperandVal(0);
9707  if (SHLAmt % 8 == 0) {
9708  SmallVector<int, 16> Mask(16, 0);
9709  std::iota(Mask.begin(), Mask.end(), 0);
9710  std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9711  if (SDValue Shuffle =
9713  DAG.getUNDEF(MVT::v16i8), Mask))
9714  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9715  }
9716  SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9717  SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9718  DAG.getConstant(SHLAmt, dl, MVT::i32));
9719  SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9720  DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9721  SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9722  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9723 }
9724 
9725 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9726 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
9727 /// return the code it can be lowered into. Worst case, it can always be
9728 /// lowered into a vperm.
9729 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9730  SelectionDAG &DAG) const {
9731  SDLoc dl(Op);
9732  SDValue V1 = Op.getOperand(0);
9733  SDValue V2 = Op.getOperand(1);
9734  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9735 
9736  // Any nodes that were combined in the target-independent combiner prior
9737  // to vector legalization will not be sent to the target combine. Try to
9738  // combine it here.
9739  if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9740  if (!isa<ShuffleVectorSDNode>(NewShuffle))
9741  return NewShuffle;
9742  Op = NewShuffle;
9743  SVOp = cast<ShuffleVectorSDNode>(Op);
9744  V1 = Op.getOperand(0);
9745  V2 = Op.getOperand(1);
9746  }
9747  EVT VT = Op.getValueType();
9748  bool isLittleEndian = Subtarget.isLittleEndian();
9749 
9750  unsigned ShiftElts, InsertAtByte;
9751  bool Swap = false;
9752 
9753  // If this is a load-and-splat, we can do that with a single instruction
9754  // in some cases. However if the load has multiple uses, we don't want to
9755  // combine it because that will just produce multiple loads.
9756  bool IsPermutedLoad = false;
9757  const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9758  if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9759  (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9760  InputLoad->hasOneUse()) {
9761  bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9762  int SplatIdx =
9763  PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9764 
9765  // The splat index for permuted loads will be in the left half of the vector
9766  // which is strictly wider than the loaded value by 8 bytes. So we need to
9767  // adjust the splat index to point to the correct address in memory.
9768  if (IsPermutedLoad) {
9769  assert((isLittleEndian || IsFourByte) &&
9770  "Unexpected size for permuted load on big endian target");
9771  SplatIdx += IsFourByte ? 2 : 1;
9772  assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9773  "Splat of a value outside of the loaded memory");
9774  }
9775 
9776  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9777  // For 4-byte load-and-splat, we need Power9.
9778  if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9779  uint64_t Offset = 0;
9780  if (IsFourByte)
9781  Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9782  else
9783  Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9784 
9785  // If the width of the load is the same as the width of the splat,
9786  // loading with an offset would load the wrong memory.
9787  if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9788  Offset = 0;
9789 
9790  SDValue BasePtr = LD->getBasePtr();
9791  if (Offset != 0)
9793  BasePtr, DAG.getIntPtrConstant(Offset, dl));
9794  SDValue Ops[] = {
9795  LD->getChain(), // Chain
9796  BasePtr, // BasePtr
9797  DAG.getValueType(Op.getValueType()) // VT
9798  };
9799  SDVTList VTL =
9800  DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9801  SDValue LdSplt =
9803  Ops, LD->getMemoryVT(), LD->getMemOperand());
9804  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9805  if (LdSplt.getValueType() != SVOp->getValueType(0))
9806  LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9807  return LdSplt;
9808  }
9809  }
9810  if (Subtarget.hasP9Vector() &&
9811  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9812  isLittleEndian)) {
9813  if (Swap)
9814  std::swap(V1, V2);
9815  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9816  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9817  if (ShiftElts) {
9818  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9819  DAG.getConstant(ShiftElts, dl, MVT::i32));
9820  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9821  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9822  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9823  }
9824  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9825  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9826  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9827  }
9828 
9829  if (Subtarget.hasPrefixInstrs()) {
9830  SDValue SplatInsertNode;
9831  if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9832  return SplatInsertNode;
9833  }
9834 
9835  if (Subtarget.hasP9Altivec()) {
9836  SDValue NewISDNode;
9837  if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9838  return NewISDNode;
9839 
9840  if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9841  return NewISDNode;
9842  }
9843 
9844  if (Subtarget.hasVSX() &&
9845  PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9846  if (Swap)
9847  std::swap(V1, V2);
9848  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9849  SDValue Conv2 =
9850  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9851 
9852  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9853  DAG.getConstant(ShiftElts, dl, MVT::i32));
9854  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9855  }
9856 
9857  if (Subtarget.hasVSX() &&
9858  PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9859  if (Swap)
9860  std::swap(V1, V2);
9861  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9862  SDValue Conv2 =
9863  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9864 
9865  SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9866  DAG.getConstant(ShiftElts, dl, MVT::i32));
9867  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9868  }
9869 
9870  if (Subtarget.hasP9Vector()) {
9871  if (PPC::isXXBRHShuffleMask(SVOp)) {
9872  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9873  SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9874  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9875  } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9876  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9877  SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9878  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9879  } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9880  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9881  SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9882  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9883  } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9884  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9885  SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9886  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9887  }
9888  }
9889 
9890  if (Subtarget.hasVSX()) {
9891  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9892  int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9893 
9894  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9895  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9896  DAG.getConstant(SplatIdx, dl, MVT::i32));
9897  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9898  }
9899 
9900  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9901  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9902  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9903  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9904  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9905  }
9906  }
9907 
9908  // Cases that are handled by instructions that take permute immediates
9909  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9910  // selected by the instruction selector.
9911  if (V2.isUndef()) {
9912  if (PPC::isSplatShuffleMask(SVOp, 1) ||
9913  PPC::isSplatShuffleMask(SVOp, 2) ||
9914  PPC::isSplatShuffleMask(SVOp, 4) ||
9915  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9916  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9917  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9918  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9919  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9920  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9921  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9922  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9923  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9924  (Subtarget.hasP8Altivec() && (
9925  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9926  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9927  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9928  return Op;
9929  }
9930  }
9931 
9932  // Altivec has a variety of "shuffle immediates" that take two vector inputs
9933  // and produce a fixed permutation. If any of these match, do not lower to
9934  // VPERM.
9935  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9936  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9937  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9938  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9939  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9940  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9941  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9942  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9943  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9944  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9945  (Subtarget.hasP8Altivec() && (
9946  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9947  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9948  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9949  return Op;
9950 
9951  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
9952  // perfect shuffle table to emit an optimal matching sequence.
9953  ArrayRef<int> PermMask = SVOp->getMask();
9954 
9955  unsigned PFIndexes[4];
9956  bool isFourElementShuffle = true;
9957  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9958  unsigned EltNo = 8; // Start out undef.
9959  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
9960  if (PermMask[i*4+j] < 0)
9961  continue; // Undef, ignore it.
9962 
9963  unsigned ByteSource = PermMask[i*4+j];
9964  if ((ByteSource & 3) != j) {
9965  isFourElementShuffle = false;
9966  break;
9967  }
9968 
9969  if (EltNo == 8) {
9970  EltNo = ByteSource/4;
9971  } else if (EltNo != ByteSource/4) {
9972  isFourElementShuffle = false;
9973  break;
9974  }
9975  }
9976  PFIndexes[i] = EltNo;
9977  }
9978 
9979  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9980  // perfect shuffle vector to determine if it is cost effective to do this as
9981  // discrete instructions, or whether we should use a vperm.
9982  // For now, we skip this for little endian until such time as we have a
9983  // little-endian perfect shuffle table.
9984  if (isFourElementShuffle && !isLittleEndian) {
9985  // Compute the index in the perfect shuffle table.
9986  unsigned PFTableIndex =
9987  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9988 
9989  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9990  unsigned Cost = (PFEntry >> 30);
9991 
9992  // Determining when to avoid vperm is tricky. Many things affect the cost
9993  // of vperm, particularly how many times the perm mask needs to be computed.
9994  // For example, if the perm mask can be hoisted out of a loop or is already
9995  // used (perhaps because there are multiple permutes with the same shuffle
9996  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9997  // the loop requires an extra register.
9998  //
9999  // As a compromise, we only emit discrete instructions if the shuffle can be
10000  // generated in 3 or fewer operations. When we have loop information
10001  // available, if this block is within a loop, we should avoid using vperm
10002  // for 3-operation perms and use a constant pool load instead.
10003  if (Cost < 3)
10004  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10005  }
10006 
10007  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10008  // vector that will get spilled to the constant pool.
10009  if (V2.isUndef()) V2 = V1;
10010 
10011  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10012  // that it is in input element units, not in bytes. Convert now.
10013 
10014  // For little endian, the order of the input vectors is reversed, and
10015  // the permutation mask is complemented with respect to 31. This is
10016  // necessary to produce proper semantics with the big-endian-biased vperm
10017  // instruction.
10018  EVT EltVT = V1.getValueType().getVectorElementType();
10019  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10020 
10021  SmallVector<SDValue, 16> ResultMask;
10022  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10023  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10024 
10025  for (unsigned j = 0; j != BytesPerElement; ++j)
10026  if (isLittleEndian)
10027  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10028  dl, MVT::i32));
10029  else
10030  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10031  MVT::i32));
10032  }
10033 
10034  ShufflesHandledWithVPERM++;
10035  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10036  LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10037  LLVM_DEBUG(SVOp->dump());
10038  LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10039  LLVM_DEBUG(VPermMask.dump());
10040 
10041  if (isLittleEndian)
10042  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10043  V2, V1, VPermMask);
10044  else
10045  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10046  V1, V2, VPermMask);
10047 }
10048 
10049 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10050 /// vector comparison. If it is, return true and fill in Opc/isDot with
10051 /// information about the intrinsic.
10052 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10053  bool &isDot, const PPCSubtarget &Subtarget) {
10054  unsigned IntrinsicID =
10055  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10056  CompareOpc = -1;
10057  isDot = false;
10058  switch (IntrinsicID) {
10059  default:
10060  return false;
10061  // Comparison predicates.
10062  case Intrinsic::ppc_altivec_vcmpbfp_p:
10063  CompareOpc = 966;
10064  isDot = true;
10065  break;
10066  case Intrinsic::ppc_altivec_vcmpeqfp_p:
10067  CompareOpc = 198;
10068  isDot = true;
10069  break;
10070  case Intrinsic::ppc_altivec_vcmpequb_p:
10071  CompareOpc = 6;
10072  isDot = true;
10073  break;
10074  case Intrinsic::ppc_altivec_vcmpequh_p:
10075  CompareOpc = 70;
10076  isDot = true;
10077  break;
10078  case Intrinsic::ppc_altivec_vcmpequw_p:
10079  CompareOpc = 134;
10080  isDot = true;
10081  break;
10082  case Intrinsic::ppc_altivec_vcmpequd_p:
10083  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10084  CompareOpc = 199;
10085  isDot = true;
10086  } else
10087  return false;
10088  break;
10089  case Intrinsic::ppc_altivec_vcmpneb_p:
10090  case Intrinsic::ppc_altivec_vcmpneh_p:
10091  case Intrinsic::ppc_altivec_vcmpnew_p:
10092  case Intrinsic::ppc_altivec_vcmpnezb_p:
10093  case Intrinsic::ppc_altivec_vcmpnezh_p:
10094  case Intrinsic::ppc_altivec_vcmpnezw_p:
10095  if (Subtarget.hasP9Altivec()) {
10096  switch (IntrinsicID) {
10097  default:
10098  llvm_unreachable("Unknown comparison intrinsic.");
10099  case Intrinsic::ppc_altivec_vcmpneb_p:
10100  CompareOpc = 7;
10101  break;
10102  case Intrinsic::ppc_altivec_vcmpneh_p:
10103  CompareOpc = 71;
10104  break;
10105  case Intrinsic::ppc_altivec_vcmpnew_p:
10106  CompareOpc = 135;
10107  break;
10108  case Intrinsic::ppc_altivec_vcmpnezb_p:
10109  CompareOpc = 263;
10110  break;
10111  case Intrinsic::ppc_altivec_vcmpnezh_p:
10112  CompareOpc = 327;
10113  break;
10114  case Intrinsic::ppc_altivec_vcmpnezw_p:
10115  CompareOpc = 391;
10116  break;
10117  }
10118  isDot = true;
10119  } else
10120  return false;
10121  break;
10122  case Intrinsic::ppc_altivec_vcmpgefp_p:
10123  CompareOpc = 454;
10124  isDot = true;
10125  break;
10126  case Intrinsic::ppc_altivec_vcmpgtfp_p:
10127  CompareOpc = 710;
10128  isDot = true;
10129  break;
10130  case Intrinsic::ppc_altivec_vcmpgtsb_p:
10131  CompareOpc = 774;
10132  isDot = true;
10133  break;
10134  case Intrinsic::ppc_altivec_vcmpgtsh_p:
10135  CompareOpc = 838;
10136  isDot = true;
10137  break;
10138  case Intrinsic::ppc_altivec_vcmpgtsw_p:
10139  CompareOpc = 902;
10140  isDot = true;
10141  break;
10142  case Intrinsic::ppc_altivec_vcmpgtsd_p:
10143  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10144  CompareOpc = 967;
10145  isDot = true;
10146  } else
10147  return false;
10148  break;
10149  case Intrinsic::ppc_altivec_vcmpgtub_p:
10150  CompareOpc = 518;
10151  isDot = true;
10152  break;
10153  case Intrinsic::ppc_altivec_vcmpgtuh_p:
10154  CompareOpc = 582;
10155  isDot = true;
10156  break;
10157  case Intrinsic::ppc_altivec_vcmpgtuw_p:
10158  CompareOpc = 646;
10159  isDot = true;
10160  break;
10161  case Intrinsic::ppc_altivec_vcmpgtud_p:
10162  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10163  CompareOpc = 711;
10164  isDot = true;
10165  } else
10166  return false;
10167  break;
10168 
10169  case Intrinsic::ppc_altivec_vcmpequq:
10170  case Intrinsic::ppc_altivec_vcmpgtsq:
10171  case Intrinsic::ppc_altivec_vcmpgtuq:
10172  if (!Subtarget.isISA3_1())
10173  return false;
10174  switch (IntrinsicID) {
10175  default:
10176  llvm_unreachable("Unknown comparison intrinsic.");
10177  case Intrinsic::ppc_altivec_vcmpequq:
10178  CompareOpc = 455;
10179  break;
10180  case Intrinsic::ppc_altivec_vcmpgtsq:
10181  CompareOpc = 903;
10182  break;
10183  case Intrinsic::ppc_altivec_vcmpgtuq:
10184  CompareOpc = 647;
10185  break;
10186  }
10187  break;
10188 
10189  // VSX predicate comparisons use the same infrastructure
10190  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10191  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10192  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10193  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10194  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10195  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10196  if (Subtarget.hasVSX()) {
10197  switch (IntrinsicID) {
10198  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10199  CompareOpc = 99;
10200  break;
10201  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10202  CompareOpc = 115;
10203  break;
10204  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10205  CompareOpc = 107;
10206  break;
10207  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10208  CompareOpc = 67;
10209  break;
10210  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10211  CompareOpc = 83;
10212  break;
10213  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10214  CompareOpc = 75;
10215  break;
10216  }
10217  isDot = true;
10218  } else
10219  return false;
10220  break;
10221 
10222  // Normal Comparisons.
10223  case Intrinsic::ppc_altivec_vcmpbfp:
10224  CompareOpc = 966;
10225  break;
10226  case Intrinsic::ppc_altivec_vcmpeqfp:
10227  CompareOpc = 198;
10228  break;
10229  case Intrinsic::ppc_altivec_vcmpequb:
10230  CompareOpc = 6;
10231  break;
10232  case Intrinsic::ppc_altivec_vcmpequh:
10233  CompareOpc = 70;
10234  break;
10235  case Intrinsic::ppc_altivec_vcmpequw:
10236  CompareOpc = 134;
10237  break;
10238  case Intrinsic::ppc_altivec_vcmpequd:
10239  if (Subtarget.hasP8Altivec())
10240  CompareOpc = 199;
10241  else
10242  return false;
10243  break;
10244  case Intrinsic::ppc_altivec_vcmpneb:
10245  case Intrinsic::ppc_altivec_vcmpneh:
10246  case Intrinsic::ppc_altivec_vcmpnew:
10247  case Intrinsic::ppc_altivec_vcmpnezb:
10248  case Intrinsic::ppc_altivec_vcmpnezh:
10249  case Intrinsic::ppc_altivec_vcmpnezw:
10250  if (Subtarget.hasP9Altivec())
10251  switch (IntrinsicID) {
10252  default:
10253  llvm_unreachable("Unknown comparison intrinsic.");
10254  case Intrinsic::ppc_altivec_vcmpneb:
10255  CompareOpc = 7;
10256  break;
10257  case Intrinsic::ppc_altivec_vcmpneh:
10258  CompareOpc = 71;
10259  break;
10260  case Intrinsic::ppc_altivec_vcmpnew:
10261  CompareOpc = 135;
10262  break;
10263  case Intrinsic::ppc_altivec_vcmpnezb:
10264  CompareOpc = 263;
10265  break;
10266  case Intrinsic::ppc_altivec_vcmpnezh:
10267  CompareOpc = 327;
10268  break;
10269  case Intrinsic::ppc_altivec_vcmpnezw:
10270  CompareOpc = 391;
10271  break;
10272  }
10273  else
10274  return false;
10275  break;
10276  case Intrinsic::ppc_altivec_vcmpgefp:
10277  CompareOpc = 454;
10278  break;
10279  case Intrinsic::ppc_altivec_vcmpgtfp:
10280  CompareOpc = 710;
10281  break;
10282  case Intrinsic::ppc_altivec_vcmpgtsb:
10283  CompareOpc = 774;
10284  break;
10285  case Intrinsic::ppc_altivec_vcmpgtsh:
10286  CompareOpc = 838;
10287  break;
10288  case Intrinsic::ppc_altivec_vcmpgtsw:
10289  CompareOpc = 902;
10290  break;
10291  case Intrinsic::ppc_altivec_vcmpgtsd:
10292  if (Subtarget.hasP8Altivec())
10293  CompareOpc = 967;
10294  else
10295  return false;
10296  break;
10297  case Intrinsic::ppc_altivec_vcmpgtub:
10298  CompareOpc = 518;
10299  break;
10300  case Intrinsic::ppc_altivec_vcmpgtuh:
10301  CompareOpc = 582;
10302  break;
10303  case Intrinsic::ppc_altivec_vcmpgtuw:
10304  CompareOpc = 646;
10305  break;
10306  case Intrinsic::ppc_altivec_vcmpgtud:
10307  if (Subtarget.hasP8Altivec())
10308  CompareOpc = 711;
10309  else
10310  return false;
10311  break;
10312  case Intrinsic::ppc_altivec_vcmpequq_p:
10313  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10314  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10315  if (!Subtarget.isISA3_1())
10316  return false;
10317  switch (IntrinsicID) {
10318  default:
10319  llvm_unreachable("Unknown comparison intrinsic.");
10320  case Intrinsic::ppc_altivec_vcmpequq_p:
10321  CompareOpc = 455;
10322  break;
10323  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10324  CompareOpc = 903;
10325  break;
10326  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10327  CompareOpc = 647;
10328  break;
10329  }
10330  isDot = true;
10331  break;
10332  }
10333  return true;
10334 }
10335 
10336 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10337 /// lower, do it, otherwise return null.
10338 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10339  SelectionDAG &DAG) const {
10340  unsigned IntrinsicID =
10341  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10342 
10343  SDLoc dl(Op);
10344 
10345  switch (IntrinsicID) {
10346  case Intrinsic::thread_pointer:
10347  // Reads the thread pointer register, used for __builtin_thread_pointer.
10348  if (Subtarget.isPPC64())
10349  return DAG.getRegister(PPC::X13, MVT::i64);
10350  return DAG.getRegister(PPC::R2, MVT::i32);
10351 
10352  case Intrinsic::ppc_mma_disassemble_acc:
10353  case Intrinsic::ppc_vsx_disassemble_pair: {
10354  int NumVecs = 2;
10355  SDValue WideVec = Op.getOperand(1);
10356  if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10357  NumVecs = 4;
10358  WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10359  }
10360  SmallVector<SDValue, 4> RetOps;
10361  for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10362  SDValue Extract = DAG.getNode(
10363  PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10364  DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10365  : VecNo,
10366  dl, getPointerTy(DAG.getDataLayout())));
10367  RetOps.push_back(Extract);
10368  }
10369  return DAG.getMergeValues(RetOps, dl);
10370  }
10371  }
10372 
10373  // If this is a lowered altivec predicate compare, CompareOpc is set to the
10374  // opcode number of the comparison.
10375  int CompareOpc;
10376  bool isDot;
10377  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10378  return SDValue(); // Don't custom lower most intrinsics.
10379 
10380  // If this is a non-dot comparison, make the VCMP node and we are done.
10381  if (!isDot) {
10382  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10383  Op.getOperand(1), Op.getOperand(2),
10384  DAG.getConstant(CompareOpc, dl, MVT::i32));
10385  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10386  }
10387 
10388  // Create the PPCISD altivec 'dot' comparison node.
10389  SDValue Ops[] = {
10390  Op.getOperand(2), // LHS
10391  Op.getOperand(3), // RHS
10392  DAG.getConstant(CompareOpc, dl, MVT::i32)
10393  };
10394  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10395  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10396 
10397  // Now that we have the comparison, emit a copy from the CR to a GPR.
10398  // This is flagged to the above dot comparison.
10399  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10400  DAG.getRegister(PPC::CR6, MVT::i32),
10401  CompNode.getValue(1));
10402 
10403  // Unpack the result based on how the target uses it.
10404  unsigned BitNo; // Bit # of CR6.
10405  bool InvertBit; // Invert result?
10406  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10407  default: // Can't happen, don't crash on invalid number though.
10408  case 0: // Return the value of the EQ bit of CR6.
10409  BitNo = 0; InvertBit = false;
10410  break;
10411  case 1: // Return the inverted value of the EQ bit of CR6.
10412  BitNo = 0; InvertBit = true;
10413  break;
10414  case 2: // Return the value of the LT bit of CR6.
10415  BitNo = 2; InvertBit = false;
10416  break;
10417  case 3: // Return the inverted value of the LT bit of CR6.
10418  BitNo = 2; InvertBit = true;
10419  break;
10420  }
10421 
10422  // Shift the bit into the low position.
10423  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10424  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10425  // Isolate the bit.
10426  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10427  DAG.getConstant(1, dl, MVT::i32));
10428 
10429  // If we are supposed to, toggle the bit.
10430  if (InvertBit)
10431  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10432  DAG.getConstant(1, dl, MVT::i32));
10433  return Flags;
10434 }
10435 
10436 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10437  SelectionDAG &DAG) const {
10438  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10439  // the beginning of the argument list.
10440  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10441  SDLoc DL(Op);
10442  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10443  case Intrinsic::ppc_cfence: {
10444  assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10445  assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10446  return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10448  Op.getOperand(ArgStart + 1)),
10449  Op.getOperand(0)),
10450  0);
10451  }
10452  default:
10453  break;
10454  }
10455  return SDValue();
10456 }
10457 
10458 // Lower scalar BSWAP64 to xxbrd.
10459 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10460  SDLoc dl(Op);
10461  if (!Subtarget.isPPC64())
10462  return Op;
10463  // MTVSRDD
10464  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10465  Op.getOperand(0));
10466  // XXBRD
10467  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10468  // MFVSRD
10469  int VectorIndex = 0;
10470  if (Subtarget.isLittleEndian())
10471  VectorIndex = 1;
10473  DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10474  return Op;
10475 }
10476 
10477 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10478 // compared to a value that is atomically loaded (atomic loads zero-extend).
10479 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10480  SelectionDAG &DAG) const {
10481  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10482  "Expecting an atomic compare-and-swap here.");
10483  SDLoc dl(Op);
10484  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10485  EVT MemVT = AtomicNode->getMemoryVT();
10486  if (MemVT.getSizeInBits() >= 32)
10487  return Op;
10488 
10489  SDValue CmpOp = Op.getOperand(2);
10490  // If this is already correctly zero-extended, leave it alone.
10491  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10492  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10493  return Op;
10494 
10495  // Clear the high bits of the compare operand.
10496  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10497  SDValue NewCmpOp =
10498  DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10499  DAG.getConstant(MaskVal, dl, MVT::i32));
10500 
10501  // Replace the existing compare operand with the properly zero-extended one.
10503  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10504  Ops.push_back(AtomicNode->getOperand(i));
10505  Ops[2] = NewCmpOp;
10506  MachineMemOperand *MMO = AtomicNode->getMemOperand();
10507  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10508  auto NodeTy =
10510  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10511 }
10512 
10513 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10514  SelectionDAG &DAG) const {
10515  SDLoc dl(Op);
10516  // Create a stack slot that is 16-byte aligned.
10518  int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10519  EVT PtrVT = getPointerTy(DAG.getDataLayout());
10520  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10521 
10522  // Store the input value into Value#0 of the stack slot.
10523  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10524  MachinePointerInfo());
10525  // Load it out.
10526  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10527 }
10528 
10529 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10530  SelectionDAG &DAG) const {
10531  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10532  "Should only be called for ISD::INSERT_VECTOR_ELT");
10533 
10534  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10535 
10536  EVT VT = Op.getValueType();
10537  SDLoc dl(Op);
10538  SDValue V1 = Op.getOperand(0);
10539  SDValue V2 = Op.getOperand(1);
10540  SDValue V3 = Op.getOperand(2);
10541 
10542  if (VT == MVT::v2f64 && C)
10543  return Op;
10544 
10545  if (Subtarget.isISA3_1()) {
10546  if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
10547  return SDValue();
10548  // On P10, we have legal lowering for constant and variable indices for
10549  // integer vectors.
10550  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10551  VT == MVT::v2i64)
10552  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
10553  // For f32 and f64 vectors, we have legal lowering for variable indices.
10554  // For f32 we also have legal lowering when the element is loaded from
10555  // memory.
10556  if (VT == MVT::v4f32 || VT == MVT::v2f64) {
10557  if (!C || (VT == MVT::v4f32 && dyn_cast<LoadSDNode>(V2)))
10558  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
10559  return Op;
10560  }
10561  }
10562 
10563  // Before P10, we have legal lowering for constant indices but not for
10564  // variable ones.
10565  if (!C)
10566  return SDValue();
10567 
10568  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10569  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10570  SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10571  unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10572  unsigned InsertAtElement = C->getZExtValue();
10573  unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10574  if (Subtarget.isLittleEndian()) {
10575  InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10576  }
10577  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10578  DAG.getConstant(InsertAtByte, dl, MVT::i32));
10579  }
10580  return Op;
10581 }
10582 
10583 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10584  SelectionDAG &DAG) const {
10585  SDLoc dl(Op);
10586  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10587  SDValue LoadChain = LN->getChain();
10588  SDValue BasePtr = LN->getBasePtr();
10589  EVT VT = Op.getValueType();
10590 
10591  if (VT != MVT::v256i1 && VT != MVT::v512i1)
10592  return Op;
10593 
10594  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10595  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10596  // 2 or 4 vsx registers.
10597  assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10598  "Type unsupported without MMA");
10599  assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10600  "Type unsupported without paired vector support");
10601  Align Alignment = LN->getAlign();
10603  SmallVector<SDValue, 4> LoadChains;
10604  unsigned NumVecs = VT.getSizeInBits() / 128;
10605  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10606  SDValue Load =
10607  DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10608  LN->getPointerInfo().getWithOffset(Idx * 16),
10609  commonAlignment(Alignment, Idx * 16),
10610  LN->getMemOperand()->getFlags(), LN->getAAInfo());
10611  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10612  DAG.getConstant(16, dl, BasePtr.getValueType()));
10613  Loads.push_back(Load);
10614  LoadChains.push_back(Load.getValue(1));
10615  }
10616  if (Subtarget.isLittleEndian()) {
10617  std::reverse(Loads.begin(), Loads.end());
10618  std::reverse(LoadChains.begin(), LoadChains.end());
10619  }
10620  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10621  SDValue Value =
10623  dl, VT, Loads);
10624  SDValue RetOps[] = {Value, TF};
10625  return DAG.getMergeValues(RetOps, dl);
10626 }
10627 
10628 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10629  SelectionDAG &DAG) const {
10630  SDLoc dl(Op);
10631  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10632  SDValue StoreChain = SN->getChain();
10633  SDValue BasePtr = SN->getBasePtr();
10634  SDValue Value = SN->getValue();
10635  EVT StoreVT = Value.getValueType();
10636 
10637  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10638  return Op;
10639 
10640  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10641  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10642  // underlying registers individually.
10643  assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10644  "Type unsupported without MMA");
10645  assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10646  "Type unsupported without paired vector support");
10647  Align Alignment = SN->getAlign();
10648  SmallVector<SDValue, 4> Stores;
10649  unsigned NumVecs = 2;
10650  if (StoreVT == MVT::v512i1) {
10652  NumVecs = 4;
10653  }
10654  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10655  unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10657  DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
10658  SDValue Store =
10659  DAG.getStore(StoreChain, dl, Elt, BasePtr,
10660  SN->getPointerInfo().getWithOffset(Idx * 16),
10661  commonAlignment(Alignment, Idx * 16),
10662  SN->getMemOperand()->getFlags(), SN->getAAInfo());
10663  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10664  DAG.getConstant(16, dl, BasePtr.getValueType()));
10665  Stores.push_back(Store);
10666  }
10667  SDValue TF = DAG.getTokenFactor(dl, Stores);
10668  return TF;
10669 }
10670 
10671 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10672  SDLoc dl(Op);
10673  if (Op.getValueType() == MVT::v4i32) {
10674  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10675 
10676  SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10677  // +16 as shift amt.
10678  SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10679  SDValue RHSSwap = // = vrlw RHS, 16
10680  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10681 
10682  // Shrinkify inputs to v8i16.
10683  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10684  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10685  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10686 
10687  // Low parts multiplied together, generating 32-bit results (we ignore the
10688  // top parts).
10689  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10690  LHS, RHS, DAG, dl, MVT::v4i32);
10691 
10692  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10693  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10694  // Shift the high parts up 16 bits.
10695  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10696  Neg16, DAG, dl);
10697  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10698  } else if (Op.getValueType() == MVT::v16i8) {
10699  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10700  bool isLittleEndian = Subtarget.isLittleEndian();
10701 
10702  // Multiply the even 8-bit parts, producing 16-bit sums.
10703  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10704  LHS, RHS, DAG, dl, MVT::v8i16);
10705  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10706 
10707  // Multiply the odd 8-bit parts, producing 16-bit sums.
10708  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10709  LHS, RHS, DAG, dl, MVT::v8i16);
10710  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10711 
10712  // Merge the results together. Because vmuleub and vmuloub are
10713  // instructions with a big-endian bias, we must reverse the
10714  // element numbering and reverse the meaning of "odd" and "even"
10715  // when generating little endian code.
10716  int Ops[16];
10717  for (unsigned i = 0; i != 8; ++i) {
10718  if (isLittleEndian) {
10719  Ops[i*2 ] = 2*i;
10720  Ops[i*2+1] = 2*i+16;
10721  } else {
10722  Ops[i*2 ] = 2*i+1;
10723  Ops[i*2+1] = 2*i+1+16;
10724  }
10725  }
10726  if (isLittleEndian)
10727  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10728  else
10729  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10730  } else {
10731  llvm_unreachable("Unknown mul to lower!");
10732  }
10733 }
10734 
10735 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10736  bool IsStrict = Op->isStrictFPOpcode();
10737  if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10738  !Subtarget.hasP9Vector())
10739  return SDValue();
10740 
10741  return Op;
10742 }
10743 
10744 // Custom lowering for fpext vf32 to v2f64
10745 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10746 
10747  assert(Op.getOpcode() == ISD::FP_EXTEND &&
10748  "Should only be called for ISD::FP_EXTEND");
10749 
10750  // FIXME: handle extends from half precision float vectors on P9.
10751  // We only want to custom lower an extend from v2f32 to v2f64.
10752  if (Op.getValueType() != MVT::v2f64 ||
10753  Op.getOperand(0).getValueType() != MVT::v2f32)
10754  return SDValue();
10755 
10756  SDLoc dl(Op);
10757  SDValue Op0 = Op.getOperand(0);
10758 
10759  switch (Op0.getOpcode()) {
10760  default:
10761  return SDValue();
10762  case ISD::EXTRACT_SUBVECTOR: {
10763  assert(Op0.getNumOperands() == 2 &&
10764  isa<ConstantSDNode>(Op0->getOperand(1)) &&
10765  "Node should have 2 operands with second one being a constant!");
10766 
10767  if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10768  return SDValue();
10769 
10770  // Custom lower is only done for high or low doubleword.
10771  int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10772  if (Idx % 2 != 0)
10773  return SDValue();
10774 
10775  // Since input is v4f32, at this point Idx is either 0 or 2.
10776  // Shift to get the doubleword position we want.
10777  int DWord = Idx >> 1;
10778 
10779  // High and low word positions are different on little endian.
10780  if (Subtarget.isLittleEndian())
10781  DWord ^= 0x1;
10782 
10783  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10784  Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10785  }
10786  case ISD::FADD:
10787  case ISD::FMUL:
10788  case ISD::FSUB: {
10789  SDValue NewLoad[2];
10790  for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10791  // Ensure both input are loads.
10792  SDValue LdOp = Op0.getOperand(i);
10793  if (LdOp.getOpcode() != ISD::LOAD)
10794  return SDValue();
10795  // Generate new load node.
10796  LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10797  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10798  NewLoad[i] = DAG.getMemIntrinsicNode(
10799  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10800  LD->getMemoryVT(), LD->getMemOperand());
10801  }
10802  SDValue NewOp =
10803  DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10804  NewLoad[1], Op0.getNode()->getFlags());
10805  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10806  DAG.getConstant(0, dl, MVT::i32));
10807  }
10808  case ISD::LOAD: {
10809  LoadSDNode *LD = cast<LoadSDNode>(Op0);
10810  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10811  SDValue NewLd = DAG.getMemIntrinsicNode(
10812  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10813  LD->getMemoryVT(), LD->getMemOperand());
10814  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10815  DAG.getConstant(0, dl, MVT::i32));
10816  }
10817  }
10818  llvm_unreachable("ERROR:Should return for all cases within swtich.");
10819 }
10820 
10821 /// LowerOperation - Provide custom lowering hooks for some operations.
10822 ///
10824  switch (Op.getOpcode()) {
10825  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10826  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10827  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10828  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10829  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10830  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10831  case ISD::STRICT_FSETCC:
10832  case ISD::STRICT_FSETCCS:
10833  case ISD::SETCC: return LowerSETCC(Op, DAG);
10834  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10835  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10836 
10837  case ISD::INLINEASM:
10838  case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
10839  // Variable argument lowering.
10840  case ISD::VASTART: return LowerVASTART(Op, DAG);
10841  case ISD::VAARG: return LowerVAARG(Op, DAG);
10842  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10843 
10844  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10845  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10847  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10848 
10849  // Exception handling lowering.
10850  case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10851  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10852  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10853 
10854  case ISD::LOAD: return LowerLOAD(Op, DAG);
10855  case ISD::STORE: return LowerSTORE(Op, DAG);
10856  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10857  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10860  case ISD::FP_TO_UINT:
10861  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10864  case ISD::UINT_TO_FP:
10865  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10866  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10867 
10868  // Lower 64-bit shifts.
10869  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10870  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10871  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10872 
10873  case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10874  case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10875 
10876  // Vector-related lowering.
10877  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10878  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10879  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10880  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10881  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10882  case ISD::MUL: return LowerMUL(Op, DAG);
10883  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10884  case ISD::STRICT_FP_ROUND:
10885  case ISD::FP_ROUND:
10886  return LowerFP_ROUND(Op, DAG);
10887  case ISD::ROTL: return LowerROTL(Op, DAG);
10888 
10889  // For counter-based loop handling.
10890  case ISD::INTRINSIC_W_CHAIN: return SDValue();
10891 
10892  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10893 
10894  // Frame & Return address.
10895  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10896  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10897 
10898  case ISD::INTRINSIC_VOID:
10899  return LowerINTRINSIC_VOID(Op, DAG);
10900  case ISD::BSWAP:
10901  return LowerBSWAP(Op, DAG);
10902  case ISD::ATOMIC_CMP_SWAP:
10903  return LowerATOMIC_CMP_SWAP(Op, DAG);
10904  }
10905 }
10906 
10909  SelectionDAG &DAG) const {
10910  SDLoc dl(N);
10911  switch (N->getOpcode()) {
10912  default:
10913  llvm_unreachable("Do not know how to custom type legalize this operation!");
10914  case ISD::READCYCLECOUNTER: {
10916  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10917 
10918  Results.push_back(
10919  DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10920  Results.push_back(RTB.getValue(2));
10921  break;
10922  }
10923  case ISD::INTRINSIC_W_CHAIN: {
10924  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10925  Intrinsic::loop_decrement)
10926  break;
10927 
10928  assert(N->getValueType(0) == MVT::i1 &&
10929  "Unexpected result type for CTR decrement intrinsic");
10930  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10931  N->getValueType(0));
10932  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10933  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10934  N->getOperand(1));
10935 
10936  Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10937  Results.push_back(NewInt.getValue(1));
10938  break;
10939  }
10940  case ISD::VAARG: {
10941  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10942  return;
10943 
10944  EVT VT = N->getValueType(0);
10945 
10946  if (VT == MVT::i64) {
10947  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10948 
10949  Results.push_back(NewNode);
10950  Results.push_back(NewNode.getValue(1));
10951  }
10952  return;
10953  }
10956  case ISD::FP_TO_SINT:
10957  case ISD::FP_TO_UINT:
10958  // LowerFP_TO_INT() can only handle f32 and f64.
10959  if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10960  MVT::ppcf128)
10961  return;
10962  Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10963  return;
10964  case ISD::TRUNCATE: {
10965  if (!N->getValueType(0).isVector())
10966  return;
10967  SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10968  if (Lowered)
10969  Results.push_back(Lowered);
10970  return;
10971  }
10972  case ISD::FSHL:
10973  case ISD::FSHR:
10974  // Don't handle funnel shifts here.
10975  return;
10976  case ISD::BITCAST:
10977  // Don't handle bitcast here.
10978  return;
10979  case ISD::FP_EXTEND:
10980  SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10981  if (Lowered)
10982  Results.push_back(Lowered);
10983  return;
10984  }
10985 }
10986 
10987 //===----------------------------------------------------------------------===//
10988 // Other Lowering Code
10989 //===----------------------------------------------------------------------===//
10990 
10992  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10994  return Builder.CreateCall(Func, {});
10995 }
10996 
10997 // The mappings for emitLeading/TrailingFence is taken from
10998 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11000  Instruction *Inst,
11001  AtomicOrdering Ord) const {
11002  if (Ord == AtomicOrdering::SequentiallyConsistent)
11003  return callIntrinsic(Builder, Intrinsic::ppc_sync);
11004  if (isReleaseOrStronger(Ord))
11005  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11006  return nullptr;
11007 }
11008 
11010  Instruction *Inst,
11011  AtomicOrdering Ord) const {
11012  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11013  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11014  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11015  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11016  if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11017  return Builder.CreateCall(
11019  Builder.GetInsertBlock()->getParent()->getParent(),
11020  Intrinsic::ppc_cfence, {Inst->getType()}),
11021  {Inst});
11022  // FIXME: Can use isync for rmw operation.
11023  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11024  }
11025  return nullptr;
11026 }
11027 
11030  unsigned AtomicSize,
11031  unsigned BinOpcode,
11032  unsigned CmpOpcode,
11033  unsigned CmpPred) const {
11034  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11035  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11036 
11037  auto LoadMnemonic = PPC::LDARX;
11038  auto StoreMnemonic = PPC::STDCX;
11039  switch (AtomicSize) {
11040  default:
11041  llvm_unreachable("Unexpected size of atomic entity");
11042  case 1:
11043  LoadMnemonic = PPC::LBARX;
11044  StoreMnemonic = PPC::STBCX;
11045  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11046  break;
11047  case 2:
11048  LoadMnemonic = PPC::LHARX;
11049  StoreMnemonic = PPC::STHCX;
11050  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11051  break;
11052  case 4:
11053  LoadMnemonic = PPC::LWARX;
11054  StoreMnemonic = PPC::STWCX;
11055  break;
11056  case 8:
11057  LoadMnemonic = PPC::LDARX;
11058  StoreMnemonic = PPC::STDCX;
11059  break;
11060  }
11061 
11062  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11063  MachineFunction *F = BB->getParent();
11064  MachineFunction::iterator It = ++BB->getIterator();
11065 
11066  Register dest = MI.getOperand(0).getReg();
11067  Register ptrA = MI.getOperand(1).getReg();
11068  Register ptrB = MI.getOperand(2).getReg();
11069  Register incr = MI.getOperand(3).getReg();
11070  DebugLoc dl = MI.getDebugLoc();
11071 
11072  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11073  MachineBasicBlock *loop2MBB =
11074  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11075  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11076  F->insert(It, loopMBB);
11077  if (CmpOpcode)
11078  F->insert(It, loop2MBB);
11079  F->insert(It, exitMBB);
11080  exitMBB->splice(exitMBB->begin(), BB,
11081  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11083 
11084  MachineRegisterInfo &RegInfo = F->getRegInfo();
11085  Register TmpReg = (!BinOpcode) ? incr :
11086  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11087  : &PPC::GPRCRegClass);
11088 
11089  // thisMBB:
11090  // ...
11091  // fallthrough --> loopMBB
11092  BB->addSuccessor(loopMBB);
11093 
11094  // loopMBB:
11095  // l[wd]arx dest, ptr
11096  // add r0, dest, incr
11097  // st[wd]cx. r0, ptr
11098  // bne- loopMBB
11099  // fallthrough --> exitMBB
11100 
11101  // For max/min...
11102  // loopMBB:
11103  // l[wd]arx dest, ptr
11104  // cmpl?[wd] incr, dest
11105  // bgt exitMBB
11106  // loop2MBB:
11107  // st[wd]cx. dest, ptr
11108  // bne- loopMBB
11109  // fallthrough --> exitMBB
11110 
11111  BB = loopMBB;
11112  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11113  .addReg(ptrA).addReg(ptrB);
11114  if (BinOpcode)
11115  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11116  if (CmpOpcode) {
11117  // Signed comparisons of byte or halfword values must be sign-extended.
11118  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11119  Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11120  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11121  ExtReg).addReg(dest);
11122  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11123  .addReg(incr).addReg(ExtReg);
11124  } else
11125  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11126  .addReg(incr).addReg(dest);
11127 
11128  BuildMI(BB, dl, TII->get(PPC::BCC))
11129  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11130  BB->addSuccessor(loop2MBB);
11131  BB->addSuccessor(exitMBB);
11132  BB = loop2MBB;
11133  }
11134  BuildMI(BB, dl, TII->get(StoreMnemonic))
11135  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11136  BuildMI(BB, dl, TII->get(PPC::BCC))
11137  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11138  BB->addSuccessor(loopMBB);
11139  BB->addSuccessor(exitMBB);
11140 
11141  // exitMBB:
11142  // ...
11143  BB = exitMBB;
11144  return BB;
11145 }
11146 
11148  switch(MI.getOpcode()) {
11149  default:
11150  return false;
11151  case PPC::COPY:
11152  return TII->isSignExtended(MI);
11153  case PPC::LHA:
11154  case PPC::LHA8:
11155  case PPC::LHAU:
11156  case PPC::LHAU8:
11157  case PPC::LHAUX:
11158  case PPC::LHAUX8:
11159  case PPC::LHAX:
11160  case PPC::LHAX8:
11161  case PPC::LWA:
11162  case PPC::LWAUX:
11163  case PPC::LWAX:
11164  case PPC::LWAX_32:
11165  case PPC::LWA_32:
11166  case PPC::PLHA:
11167  case PPC::PLHA8:
11168  case PPC::PLHA8pc:
11169  case PPC::PLHApc:
11170  case PPC::PLWA:
11171  case PPC::PLWA8:
11172  case PPC::PLWA8pc:
11173  case PPC::PLWApc:
11174  case PPC::EXTSB:
11175  case PPC::EXTSB8:
11176  case PPC::EXTSB8_32_64:
11177  case PPC::EXTSB8_rec:
11178  case PPC::EXTSB_rec:
11179  case PPC::EXTSH:
11180  case PPC::EXTSH8:
11181  case PPC::EXTSH8_32_64:
11182  case PPC::EXTSH8_rec:
11183  case PPC::EXTSH_rec:
11184  case PPC::EXTSW:
11185  case PPC::EXTSWSLI:
11186  case PPC::EXTSWSLI_32_64:
11187  case PPC::EXTSWSLI_32_64_rec:
11188  case PPC::EXTSWSLI_rec:
11189  case PPC::EXTSW_32:
11190  case PPC::EXTSW_32_64:
11191  case PPC::EXTSW_32_64_rec:
11192  case PPC::EXTSW_rec:
11193  case PPC::SRAW:
11194  case PPC::SRAWI:
11195  case PPC::SRAWI_rec:
11196  case PPC::SRAW_rec:
11197  return true;
11198  }
11199  return false;
11200 }
11201 
11204  bool is8bit, // operation
11205  unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11206  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11207  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11208 
11209  // If this is a signed comparison and the value being compared is not known
11210  // to be sign extended, sign extend it here.
11211  DebugLoc dl = MI.getDebugLoc();
11212  MachineFunction *F = BB->getParent();
11213  MachineRegisterInfo &RegInfo = F->getRegInfo();
11214  Register incr = MI.getOperand(3).getReg();
11215  bool IsSignExtended = Register::isVirtualRegister(incr) &&
11216  isSignExtended(*RegInfo.getVRegDef(incr), TII);
11217 
11218  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11219  Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11220  BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11221  .addReg(MI.getOperand(3).getReg());
11222  MI.getOperand(3).setReg(ValueReg);
11223  }
11224  // If we support part-word atomic mnemonics, just use them
11225  if (Subtarget.hasPartwordAtomics())
11226  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11227  CmpPred);
11228 
11229  // In 64 bit mode we have to use 64 bits for addresses, even though the
11230  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11231  // registers without caring whether they're 32 or 64, but here we're
11232  // doing actual arithmetic on the addresses.
11233  bool is64bit = Subtarget.isPPC64();
11234  bool isLittleEndian = Subtarget.isLittleEndian();
11235  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11236 
11237  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11238  MachineFunction::iterator It = ++BB->getIterator();
11239 
11240  Register dest = MI.getOperand(0).getReg();
11241  Register ptrA = MI.getOperand(1).getReg();
11242  Register ptrB = MI.getOperand(2).getReg();
11243 
11244  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11245  MachineBasicBlock *loop2MBB =
11246  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11247  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11248  F->insert(It, loopMBB);
11249  if (CmpOpcode)
11250  F->insert(It, loop2MBB);
11251  F->insert(It, exitMBB);
11252  exitMBB->splice(exitMBB->begin(), BB,
11253  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11255 
11256  const TargetRegisterClass *RC =
11257  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11258  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11259 
11260  Register PtrReg = RegInfo.createVirtualRegister(RC);
11261  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11262  Register ShiftReg =
11263  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11264  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11265  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11266  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11267  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11268  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11269  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11270  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11271  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11272  Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
11273  Register Ptr1Reg;
11274  Register TmpReg =
11275  (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11276 
11277  // thisMBB:
11278  // ...
11279  // fallthrough --> loopMBB
11280  BB->addSuccessor(loopMBB);
11281 
11282  // The 4-byte load must be aligned, while a char or short may be
11283  // anywhere in the word. Hence all this nasty bookkeeping code.
11284  // add ptr1, ptrA, ptrB [copy if ptrA==0]
11285  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11286  // xori shift, shift1, 24 [16]
11287  // rlwinm ptr, ptr1, 0, 0, 29
11288  // slw incr2, incr, shift
11289  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11290  // slw mask, mask2, shift
11291  // loopMBB:
11292  // lwarx tmpDest, ptr
11293  // add tmp, tmpDest, incr2
11294  // andc tmp2, tmpDest, mask
11295  // and tmp3, tmp, mask
11296  // or tmp4, tmp3, tmp2
11297  // stwcx. tmp4, ptr
11298  // bne- loopMBB
11299  // fallthrough --> exitMBB
11300  // srw SrwDest, tmpDest, shift
11301  // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
11302  if (ptrA != ZeroReg) {
11303  Ptr1Reg = RegInfo.createVirtualRegister(RC);
11304  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11305  .addReg(ptrA)
11306  .addReg(ptrB);
11307  } else {
11308  Ptr1Reg = ptrB;
11309  }
11310  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11311  // mode.
11312  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11313  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11314  .addImm(3)
11315  .addImm(27)
11316  .addImm(is8bit ? 28 : 27);
11317  if (!isLittleEndian)
11318  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11319  .addReg(Shift1Reg)
11320  .addImm(is8bit ? 24 : 16);
11321  if (is64bit)
11322  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11323  .addReg(Ptr1Reg)
11324  .addImm(0)
11325  .addImm(61);
11326  else
11327  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11328  .addReg(Ptr1Reg)
11329  .addImm(0)
11330  .addImm(0)
11331  .addImm(29);
11332  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11333  if (is8bit)
11334  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11335  else {
11336  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11337  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11338  .addReg(Mask3Reg)
11339  .addImm(65535);
11340  }
11341  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11342  .addReg(Mask2Reg)
11343  .addReg(ShiftReg);
11344 
11345  BB = loopMBB;
11346  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11347  .addReg(ZeroReg)
11348  .addReg(PtrReg);
11349  if (BinOpcode)
11350  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11351  .addReg(Incr2Reg)
11352  .addReg(TmpDestReg);
11353  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11354  .addReg(TmpDestReg)
11355  .addReg(MaskReg);
11356  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11357  if (CmpOpcode) {
11358  // For unsigned comparisons, we can directly compare the shifted values.
11359  // For signed comparisons we shift and sign extend.
11360  Register SReg = RegInfo.createVirtualRegister(GPRC);
11361  BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11362  .addReg(TmpDestReg)
11363  .addReg(MaskReg);
11364  unsigned ValueReg = SReg;
11365  unsigned CmpReg = Incr2Reg;
11366  if (CmpOpcode == PPC::CMPW) {
11367  ValueReg = RegInfo.createVirtualRegister(GPRC);
11368  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11369  .addReg(SReg)
11370  .addReg(ShiftReg);
11371  Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11372  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11373  .addReg(ValueReg);
11374  ValueReg = ValueSReg;
11375  CmpReg = incr;
11376  }
11377  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11378  .addReg(CmpReg)
11379  .addReg(ValueReg);
11380  BuildMI(BB, dl, TII->get(PPC::BCC))
11381  .addImm(CmpPred)
11382  .addReg(PPC::CR0)
11383  .addMBB(exitMBB);
11384  BB->addSuccessor(loop2MBB);
11385  BB->addSuccessor(exitMBB);
11386  BB = loop2MBB;
11387  }
11388  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11389  BuildMI(BB, dl, TII->get(PPC::STWCX))
11390  .addReg(Tmp4Reg)
11391  .addReg(ZeroReg)
11392  .addReg(PtrReg);
11393  BuildMI(BB, dl, TII->get(PPC::BCC))
11395  .addReg(PPC::CR0)
11396  .addMBB(loopMBB);
11397  BB->addSuccessor(loopMBB);
11398  BB->addSuccessor(exitMBB);
11399 
11400  // exitMBB:
11401  // ...
11402  BB = exitMBB;
11403  // Since the shift amount is not a constant, we need to clear
11404  // the upper bits with a separate RLWINM.
11405  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
11406  .addReg(SrwDestReg)
11407  .addImm(0)
11408  .addImm(is8bit ? 24 : 16)
11409  .addImm(31);
11410  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
11411  .addReg(TmpDestReg)
11412  .addReg(ShiftReg);
11413  return BB;
11414 }
11415 
11418  MachineBasicBlock *MBB) const {
11419  DebugLoc DL = MI.getDebugLoc();
11420  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11421  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11422 
11423  MachineFunction *MF = MBB->getParent();
11425 
11426  const BasicBlock *BB = MBB->getBasicBlock();
11428 
11429  Register DstReg = MI.getOperand(0).getReg();
11430  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11431  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11432  Register mainDstReg = MRI.createVirtualRegister(RC);
11433  Register restoreDstReg = MRI.createVirtualRegister(RC);
11434 
11435  MVT PVT = getPointerTy(MF->getDataLayout());
11436  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11437  "Invalid Pointer Size!");
11438  // For v = setjmp(buf), we generate
11439  //
11440  // thisMBB:
11441  // SjLjSetup mainMBB
11442  // bl mainMBB
11443  // v_restore = 1
11444  // b sinkMBB
11445  //
11446  // mainMBB:
11447  // buf[LabelOffset] = LR
11448  // v_main = 0
11449  //
11450  // sinkMBB:
11451  // v = phi(main, restore)
11452  //
11453 
11454  MachineBasicBlock *thisMBB = MBB;
11457  MF->insert(I, mainMBB);
11458  MF->insert(I, sinkMBB);
11459 
11460  MachineInstrBuilder MIB;
11461 
11462  // Transfer the remainder of BB and its successor edges to sinkMBB.
11463  sinkMBB->splice(sinkMBB->begin(), MBB,
11464  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11466 
11467  // Note that the structure of the jmp_buf used here is not compatible
11468  // with that used by libc, and is not designed to be. Specifically, it
11469  // stores only those 'reserved' registers that LLVM does not otherwise
11470  // understand how to spill. Also, by convention, by the time this
11471  // intrinsic is called, Clang has already stored the frame address in the
11472  // first slot of the buffer and stack address in the third. Following the
11473  // X86 target code, we'll store the jump address in the second slot. We also
11474  // need to save the TOC pointer (R2) to handle jumps between shared
11475  // libraries, and that will be stored in the fourth slot. The thread
11476  // identifier (R13) is not affected.
11477 
11478  // thisMBB:
11479  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11480  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11481  const int64_t BPOffset = 4 * PVT.getStoreSize();
11482 
11483  // Prepare IP either in reg.
11484  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11485  Register LabelReg = MRI.createVirtualRegister(PtrRC);
11486  Register BufReg = MI.getOperand(1).getReg();
11487 
11488  if (Subtarget.is64BitELFABI()) {
11490  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11491  .addReg(PPC::X2)
11492  .addImm(TOCOffset)
11493  .addReg(BufReg)
11494  .cloneMemRefs(MI);
11495  }
11496 
11497  // Naked functions never have a base pointer, and so we use r1. For all
11498  // other functions, this decision must be delayed until during PEI.
11499  unsigned BaseReg;
11500  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11501  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11502  else
11503  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11504 
11505  MIB = BuildMI(*thisMBB, MI, DL,
11506  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11507  .addReg(BaseReg)
11508  .addImm(BPOffset)
11509  .addReg(BufReg)
11510  .cloneMemRefs(MI);
11511 
11512  // Setup
11513  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11515 
11516  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11517 
11518  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11519  .addMBB(mainMBB);
11520  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11521 
11522  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11523  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11524 
11525  // mainMBB:
11526  // mainDstReg = 0
11527  MIB =
11528  BuildMI(mainMBB, DL,
11529  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11530 
11531  // Store IP
11532  if (Subtarget.isPPC64()) {
11533  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11534  .addReg(LabelReg)
11535  .addImm(LabelOffset)
11536  .addReg(BufReg);
11537  } else {
11538  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11539  .addReg(LabelReg)
11540  .addImm(LabelOffset)
11541  .addReg(BufReg);
11542  }
11543  MIB.cloneMemRefs(MI);
11544 
11545  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11546  mainMBB->addSuccessor(sinkMBB);
11547 
11548  // sinkMBB:
11549  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11550  TII->get(PPC::PHI), DstReg)
11551  .addReg(mainDstReg).addMBB(mainMBB)
11552  .addReg(restoreDstReg).addMBB(thisMBB);
11553 
11554  MI.eraseFromParent();
11555  return sinkMBB;
11556 }
11557 
11560  MachineBasicBlock *MBB) const {
11561  DebugLoc DL = MI.getDebugLoc();
11562  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11563 
11564  MachineFunction *MF = MBB->getParent();
11566 
11567  MVT PVT = getPointerTy(MF->getDataLayout());
11568  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11569  "Invalid Pointer Size!");
11570 
11571  const TargetRegisterClass *RC =
11572  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11574  // Since FP is only updated here but NOT referenced, it's treated as GPR.
11575  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11576  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11577  unsigned BP =
11578  (PVT == MVT::i64)
11579  ? PPC::X30
11580  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11581  : PPC::R30);
11582 
11583  MachineInstrBuilder MIB;
11584 
11585  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11586  const int64_t SPOffset = 2 * PVT.getStoreSize();
11587  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11588  const int64_t BPOffset = 4 * PVT.getStoreSize();
11589 
11590  Register BufReg = MI.getOperand(0).getReg();
11591 
11592  // Reload FP (the jumped-to function may not have had a
11593  // frame pointer, and if so, then its r31 will be restored
11594  // as necessary).
11595  if (PVT == MVT::i64) {
11596  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11597  .addImm(0)
11598  .addReg(BufReg);
11599  } else {
11600  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11601  .addImm(0)
11602  .addReg(BufReg);
11603  }
11604  MIB.cloneMemRefs(MI);
11605 
11606  // Reload IP
11607  if (PVT == MVT::i64) {
11608  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11609  .addImm(LabelOffset)
11610  .addReg(BufReg);
11611  } else {
11612  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11613  .addImm(LabelOffset)
11614  .addReg(BufReg);
11615  }
11616  MIB.cloneMemRefs(MI);
11617 
11618  // Reload SP
11619  if (PVT == MVT::i64) {
11620  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11621  .addImm(SPOffset)
11622  .addReg(BufReg);
11623  } else {
11624  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11625  .addImm(SPOffset)
11626  .addReg(BufReg);
11627  }
11628  MIB.cloneMemRefs(MI);
11629 
11630  // Reload BP
11631  if (PVT == MVT::i64) {
11632  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11633  .addImm(BPOffset)
11634  .addReg(BufReg);
11635  } else {
11636  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11637  .addImm(BPOffset)
11638  .addReg(BufReg);
11639  }
11640  MIB.cloneMemRefs(MI);
11641 
11642  // Reload TOC
11643  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11645  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11646  .addImm(TOCOffset)
11647  .addReg(BufReg)
11648  .cloneMemRefs(MI);
11649  }
11650 
11651  // Jump
11652  BuildMI(*MBB, MI, DL,
11653  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11654  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11655 
11656  MI.eraseFromParent();
11657  return MBB;
11658 }
11659 
11661  // If the function specifically requests inline stack probes, emit them.
11662  if (MF.getFunction().hasFnAttribute("probe-stack"))
11663  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11664  "inline-asm";
11665  return false;
11666 }
11667 
11669  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11670  unsigned StackAlign = TFI->getStackAlignment();
11672  "Unexpected stack alignment");
11673  // The default stack probe size is 4096 if the function has no
11674  // stack-probe-size attribute.
11675  unsigned StackProbeSize = 4096;
11676  const Function &Fn = MF.getFunction();
11677  if (Fn.hasFnAttribute("stack-probe-size"))
11678  Fn.getFnAttribute("stack-probe-size")
11679  .getValueAsString()
11680  .getAsInteger(0, StackProbeSize);
11681  // Round down to the stack alignment.
11682  StackProbeSize &= ~(StackAlign - 1);
11683  return StackProbeSize ? StackProbeSize : StackAlign;
11684 }
11685 
11686 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11687 // into three phases. In the first phase, it uses pseudo instruction
11688 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11689 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11690 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11691 // MaxCallFrameSize so that it can calculate correct data area pointer.
11694  MachineBasicBlock *MBB) const {
11695  const bool isPPC64 = Subtarget.isPPC64();
11696  MachineFunction *MF = MBB->getParent();
11697  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11698  DebugLoc DL = MI.getDebugLoc();
11699  const unsigned ProbeSize = getStackProbeSize(*MF);
11700  const BasicBlock *ProbedBB = MBB->getBasicBlock();
11702  // The CFG of probing stack looks as
11703  // +-----+
11704  // | MBB |
11705  // +--+--+
11706  // |
11707  // +----v----+
11708  // +--->+ TestMBB +---+
11709  // | +----+----+ |
11710  // | | |
11711  // | +-----v----+ |
11712  // +---+ BlockMBB | |
11713  // +----------+ |
11714  // |
11715  // +---------+ |
11716  // | TailMBB +<--+
11717  // +---------+
11718  // In MBB, calculate previous frame pointer and final stack pointer.
11719  // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11720  // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11721  // TailMBB is spliced via \p MI.
11722  MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11723  MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11724  MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11725 
11726  MachineFunction::iterator MBBIter = ++MBB->getIterator();
11727  MF->insert(MBBIter, TestMBB);
11728  MF->insert(MBBIter, BlockMBB);
11729  MF->insert(MBBIter, TailMBB);
11730 
11731  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11732  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11733 
11734  Register DstReg = MI.getOperand(0).getReg();
11735  Register NegSizeReg = MI.getOperand(1).getReg();
11736  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11737  Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11738  Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11739  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11740 
11741  // Since value of NegSizeReg might be realigned in prologepilog, insert a
11742  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11743  // NegSize.
11744  unsigned ProbeOpc;
11745  if (!MRI.hasOneNonDBGUse(NegSizeReg))
11746  ProbeOpc =
11747  isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11748  else
11749  // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11750  // and NegSizeReg will be allocated in the same phyreg to avoid
11751  // redundant copy when NegSizeReg has only one use which is current MI and
11752  // will be replaced by PREPARE_PROBED_ALLOCA then.
11753  ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11754  : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11755  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11756  .addDef(ActualNegSizeReg)
11757  .addReg(NegSizeReg)
11758  .add(MI.getOperand(2))
11759  .add(MI.getOperand(3));
11760 
11761  // Calculate final stack pointer, which equals to SP + ActualNegSize.
11762  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11763  FinalStackPtr)
11764  .addReg(SPReg)
11765  .addReg(ActualNegSizeReg);
11766 
11767  // Materialize a scratch register for update.
11768  int64_t NegProbeSize = -(int64_t)ProbeSize;
11769  assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11770  Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11771  if (!isInt<16>(NegProbeSize)) {
11772  Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11773  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11774  .addImm(NegProbeSize >> 16);
11775  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11776  ScratchReg)
11777  .addReg(TempReg)
11778  .addImm(NegProbeSize & 0xFFFF);
11779  } else
11780  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11781  .addImm(NegProbeSize);
11782 
11783  {
11784  // Probing leading residual part.
11785  Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11786  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11787  .addReg(ActualNegSizeReg)
11788  .addReg(ScratchReg);
11789  Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11790  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11791  .addReg(Div)
11792  .addReg(ScratchReg);
11793  Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11794  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11795  .addReg(Mul)
11796  .addReg(ActualNegSizeReg);
11797  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11798  .addReg(FramePointer)
11799  .addReg(SPReg)
11800  .addReg(NegMod);
11801  }
11802 
11803  {
11804  // Remaining part should be multiple of ProbeSize.
11805  Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11806  BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11807  .addReg(SPReg)
11808  .addReg(FinalStackPtr);
11809  BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11811  .addReg(CmpResult)
11812  .addMBB(TailMBB);
11813  TestMBB->addSuccessor(BlockMBB);
11814  TestMBB->addSuccessor(TailMBB);
11815  }
11816 
11817  {
11818  // Touch the block.
11819  // |P...|P...|P...
11820  BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11821  .addReg(FramePointer)
11822  .addReg(SPReg)
11823  .addReg(ScratchReg);
11824  BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11825  BlockMBB->addSuccessor(TestMBB);
11826  }
11827 
11828  // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11829  // DYNAREAOFFSET pseudo instruction to get the future result.
11830  Register MaxCallFrameSizeReg =
11831  MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11832  BuildMI(TailMBB, DL,
11833  TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11834  MaxCallFrameSizeReg)
11835  .add(MI.getOperand(2))
11836  .add(MI.getOperand(3));
11837  BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11838  .addReg(SPReg)
11839  .addReg(MaxCallFrameSizeReg);
11840 
11841  // Splice instructions after MI to TailMBB.
11842  TailMBB->splice(TailMBB->end(), MBB,
11843  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11845  MBB->addSuccessor(TestMBB);
11846 
11847  // Delete the pseudo instruction.
11848  MI.eraseFromParent();
11849 
11850  ++NumDynamicAllocaProbed;
11851  return TailMBB;
11852 }
11853 
11856  MachineBasicBlock *BB) const {
11857  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11858  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11859  if (Subtarget.is64BitELFABI() &&
11860  MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11861  !Subtarget.isUsingPCRelativeCalls()) {
11862  // Call lowering should have added an r2 operand to indicate a dependence
11863  // on the TOC base pointer value. It can't however, because there is no
11864  // way to mark the dependence as implicit there, and so the stackmap code
11865  // will confuse it with a regular operand. Instead, add the dependence
11866  // here.
11867  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11868  }
11869 
11870  return emitPatchPoint(MI, BB);
11871  }
11872 
11873  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11874  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11875  return emitEHSjLjSetJmp(MI, BB);
11876  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11877  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11878  return emitEHSjLjLongJmp(MI, BB);
11879  }
11880 
11881  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11882 
11883  // To "insert" these instructions we actually have to insert their
11884  // control-flow patterns.
11885  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11886  MachineFunction::iterator It = ++BB->getIterator();
11887 
11888  MachineFunction *F = BB->getParent();
11889 
11890  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11891  MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11892  MI.getOpcode() == PPC::SELECT_I8) {
11894  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11895  MI.getOpcode() == PPC::SELECT_CC_I8)
11896  Cond.push_back(MI.getOperand(4));
11897  else
11899  Cond.push_back(MI.getOperand(1));
11900 
11901  DebugLoc dl = MI.getDebugLoc();
11902  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11903  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11904  } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11905  MI.getOpcode() == PPC::SELECT_CC_F8 ||
11906  MI.getOpcode() == PPC::SELECT_CC_F16 ||
11907  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11908  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11909  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11910  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11911  MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11912  MI.getOpcode() == PPC::SELECT_CC_SPE ||
11913  MI.getOpcode() == PPC::SELECT_F4 ||
11914  MI.getOpcode() == PPC::SELECT_F8 ||
11915  MI.getOpcode() == PPC::SELECT_F16 ||
11916  MI.getOpcode() == PPC::SELECT_SPE ||
11917  MI.getOpcode() == PPC::SELECT_SPE4 ||
11918  MI.getOpcode() == PPC::SELECT_VRRC ||
11919  MI.getOpcode() == PPC::SELECT_VSFRC ||
11920  MI.getOpcode() == PPC::SELECT_VSSRC ||
11921  MI.getOpcode() == PPC::SELECT_VSRC) {
11922  // The incoming instruction knows the destination vreg to set, the
11923  // condition code register to branch on, the true/false values to
11924  // select between, and a branch opcode to use.
11925 
11926  // thisMBB:
11927  // ...
11928  // TrueVal = ...
11929  // cmpTY ccX, r1, r2
11930  // bCC copy1MBB
11931  // fallthrough --> copy0MBB
11932  MachineBasicBlock *thisMBB = BB;
11933  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11934  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11935  DebugLoc dl = MI.getDebugLoc();
11936  F->insert(It, copy0MBB);
11937  F->insert(It, sinkMBB);
11938 
11939  // Transfer the remainder of BB and its successor edges to sinkMBB.
11940  sinkMBB->splice(sinkMBB->begin(), BB,
11941  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11943 
11944  // Next, add the true and fallthrough blocks as its successors.
11945  BB->addSuccessor(copy0MBB);
11946  BB->addSuccessor(sinkMBB);
11947 
11948  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11949  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11950  MI.getOpcode() == PPC::SELECT_F16 ||
11951  MI.getOpcode() == PPC::SELECT_SPE4 ||
11952  MI.getOpcode() == PPC::SELECT_SPE ||
11953  MI.getOpcode() == PPC::SELECT_VRRC ||
11954  MI.getOpcode() == PPC::SELECT_VSFRC ||
11955  MI.getOpcode() == PPC::SELECT_VSSRC ||
11956  MI.getOpcode() == PPC::SELECT_VSRC) {
11957  BuildMI(BB, dl, TII->get(PPC::BC))
11958  .addReg(MI.getOperand(1).getReg())
11959  .addMBB(sinkMBB);
11960  } else {
11961  unsigned SelectPred = MI.getOperand(4).getImm();
11962  BuildMI(BB, dl, TII->get(PPC::BCC))
11963  .addImm(SelectPred)
11964  .addReg(MI.getOperand(1).getReg())
11965  .addMBB(sinkMBB);
11966  }
11967 
11968  // copy0MBB:
11969  // %FalseValue = ...
11970  // # fallthrough to sinkMBB
11971  BB = copy0MBB;
11972 
11973  // Update machine-CFG edges
11974  BB->addSuccessor(sinkMBB);
11975 
11976  // sinkMBB:
11977  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11978  // ...
11979  BB = sinkMBB;
11980  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11981  .addReg(MI.getOperand(3).getReg())
11982  .addMBB(copy0MBB)
11983  .addReg(MI.getOperand(2).getReg())
11984  .addMBB(thisMBB);
11985  } else if (MI.getOpcode() == PPC::ReadTB) {
11986  // To read the 64-bit time-base register on a 32-bit target, we read the
11987  // two halves. Should the counter have wrapped while it was being read, we
11988  // need to try again.
11989  // ...
11990  // readLoop:
11991  // mfspr Rx,TBU # load from TBU
11992  // mfspr Ry,TB # load from TB
11993  // mfspr Rz,TBU # load from TBU
11994  // cmpw crX,Rx,Rz # check if 'old'='new'
11995  // bne readLoop # branch if they're not equal
11996  // ...
11997 
11998  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11999  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12000  DebugLoc dl = MI.getDebugLoc();
12001  F->insert(It, readMBB);
12002  F->insert(It, sinkMBB);
12003 
12004  // Transfer the remainder of BB and its successor edges to sinkMBB.
12005  sinkMBB->splice(sinkMBB->begin(), BB,
12006  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12008 
12009  BB->addSuccessor(readMBB);
12010  BB = readMBB;
12011 
12012  MachineRegisterInfo &RegInfo = F->getRegInfo();
12013  Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12014  Register LoReg = MI.getOperand(0).getReg();
12015  Register HiReg = MI.getOperand(1).getReg();
12016 
12017  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12018  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12019  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12020 
12021  Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12022 
12023  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12024  .addReg(HiReg)
12025  .addReg(ReadAgainReg);
12026  BuildMI(BB, dl, TII->get(PPC::BCC))
12028  .addReg(CmpReg)
12029  .addMBB(readMBB);
12030 
12031  BB->addSuccessor(readMBB);
12032  BB->addSuccessor(sinkMBB);
12033  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12034  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12035  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12036  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12037  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12038  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12039  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12040  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12041 
12042  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12044  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12046  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12047  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12048  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12049  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12050 
12051  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12053  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12054  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12055  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12056  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12057  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12058  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12059 
12060  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12062  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12064  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12065  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12066  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12067  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12068 
12069  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12070  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12071  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12072  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12073  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12074  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12075  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12076  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12077 
12078  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12079  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12080  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12081  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12082  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12083  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12084  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12085  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12086 
12087  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12088  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12089  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12090  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12091  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12092  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12093  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12094  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12095 
12096  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12097  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12098  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12099  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12100  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12101  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12102  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12103  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12104 
12105  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12106  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12107  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12108  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12109  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12110  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12111  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12112  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12113 
12114  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12115  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12116  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12117  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12118  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12119  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12120  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12121  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12122 
12123  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12124  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12125  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12126  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12127  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12128  BB = EmitAtomicBinary(MI, BB, 4, 0);
12129  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12130  BB = EmitAtomicBinary(MI, BB, 8, 0);
12131  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12132  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12133  (Subtarget.hasPartwordAtomics() &&
12134  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12135  (Subtarget.hasPartwordAtomics() &&
12136  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12137  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12138 
12139  auto LoadMnemonic = PPC::LDARX;
12140  auto StoreMnemonic = PPC::STDCX;
12141  switch (MI.getOpcode()) {
12142  default:
12143  llvm_unreachable("Compare and swap of unknown size");
12144  case PPC::ATOMIC_CMP_SWAP_I8:
12145  LoadMnemonic = PPC::LBARX;
12146  StoreMnemonic = PPC::STBCX;
12147  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12148  break;
12149  case PPC::ATOMIC_CMP_SWAP_I16:
12150  LoadMnemonic = PPC::LHARX;
12151  StoreMnemonic = PPC::STHCX;
12152  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12153  break;
12154  case PPC::ATOMIC_CMP_SWAP_I32:
12155  LoadMnemonic = PPC::LWARX;
12156  StoreMnemonic = PPC::STWCX;
12157  break;
12158  case PPC::ATOMIC_CMP_SWAP_I64:
12159  LoadMnemonic = PPC::LDARX;
12160  StoreMnemonic = PPC::STDCX;
12161  break;
12162  }
12163  Register dest = MI.getOperand(0).getReg();
12164  Register ptrA = MI.getOperand(1).getReg();
12165  Register ptrB = MI.getOperand(2).getReg();
12166  Register oldval = MI.getOperand(3).getReg();
12167  Register newval = MI.getOperand(4).getReg();
12168  DebugLoc dl = MI.getDebugLoc();
12169 
12170  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12171  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12172  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12173  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12174  F->insert(It, loop1MBB);
12175  F->insert(It, loop2MBB);
12176  F->insert(It, midMBB);
12177  F->insert(It, exitMBB);
12178  exitMBB->splice(exitMBB->begin(), BB,
12179  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12181 
12182  // thisMBB:
12183  // ...
12184  // fallthrough --> loopMBB
12185  BB->addSuccessor(loop1MBB);
12186 
12187  // loop1MBB:
12188  // l[bhwd]arx dest, ptr
12189  // cmp[wd] dest, oldval
12190  // bne- midMBB
12191  // loop2MBB:
12192  // st[bhwd]cx. newval, ptr
12193  // bne- loopMBB
12194  // b exitBB
12195  // midMBB:
12196  // st[bhwd]cx. dest, ptr
12197  // exitBB:
12198  BB = loop1MBB;
12199  BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12200  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12201  .addReg(oldval)
12202  .addReg(dest);
12203  BuildMI(BB, dl, TII->get(PPC::BCC))
12205  .addReg(PPC::CR0)
12206  .addMBB(midMBB);
12207  BB->addSuccessor(loop2MBB);
12208  BB->addSuccessor(midMBB);
12209 
12210  BB = loop2MBB;
12211  BuildMI(BB, dl, TII->get(StoreMnemonic))
12212  .addReg(newval)
12213  .addReg(ptrA)
12214  .addReg(ptrB);
12215  BuildMI(BB, dl, TII->get(PPC::BCC))
12217  .addReg(PPC::CR0)
12218  .addMBB(loop1MBB);
12219  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12220  BB->addSuccessor(loop1MBB);
12221  BB->addSuccessor(exitMBB);
12222 
12223  BB = midMBB;
12224  BuildMI(BB, dl, TII->get(StoreMnemonic))
12225  .addReg(dest)
12226  .addReg(ptrA)
12227  .addReg(ptrB);
12228  BB->addSuccessor(exitMBB);
12229 
12230  // exitMBB:
12231  // ...
12232  BB = exitMBB;
12233  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12234  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12235  // We must use 64-bit registers for addresses when targeting 64-bit,
12236  // since we're actually doing arithmetic on them. Other registers
12237  // can be 32-bit.
12238  bool is64bit = Subtarget.isPPC64();
12239  bool isLittleEndian = Subtarget.isLittleEndian();
12240  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12241 
12242  Register dest = MI.getOperand(0).getReg();
12243  Register ptrA = MI.getOperand(1).getReg();
12244  Register ptrB = MI.getOperand(2).getReg();
12245  Register oldval = MI.getOperand(3).getReg();
12246  Register newval = MI.getOperand(4).getReg();
12247  DebugLoc dl = MI.getDebugLoc();
12248 
12249  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12250  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12251  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12252  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12253  F->insert(It, loop1MBB);
12254  F->insert(It, loop2MBB);
12255  F->insert(It, midMBB);
12256  F->insert(It, exitMBB);
12257  exitMBB->splice(exitMBB->begin(), BB,
12258  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12260 
12261  MachineRegisterInfo &RegInfo = F->getRegInfo();
12262  const TargetRegisterClass *RC =
12263  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12264  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12265 
12266  Register PtrReg = RegInfo.createVirtualRegister(RC);
12267  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12268  Register ShiftReg =
12269  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12270  Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12271  Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12272  Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12273  Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12274  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12275  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12276  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12277  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12278  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12279  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12280  Register Ptr1Reg;
12281  Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12282  Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12283  // thisMBB:
12284  // ...
12285  // fallthrough --> loopMBB
12286  BB->addSuccessor(loop1MBB);
12287 
12288  // The 4-byte load must be aligned, while a char or short may be
12289  // anywhere in the word. Hence all this nasty bookkeeping code.
12290  // add ptr1, ptrA, ptrB [copy if ptrA==0]
12291  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12292  // xori shift, shift1, 24 [16]
12293  // rlwinm ptr, ptr1, 0, 0, 29
12294  // slw newval2, newval, shift
12295  // slw oldval2, oldval,shift
12296  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12297  // slw mask, mask2, shift
12298  // and newval3, newval2, mask
12299  // and oldval3, oldval2, mask
12300  // loop1MBB:
12301  // lwarx tmpDest, ptr
12302  // and tmp, tmpDest, mask
12303  // cmpw tmp, oldval3
12304  // bne- midMBB
12305  // loop2MBB:
12306  // andc tmp2, tmpDest, mask
12307  // or tmp4, tmp2, newval3
12308  // stwcx. tmp4, ptr
12309  // bne- loop1MBB
12310  // b exitBB
12311  // midMBB:
12312  // stwcx. tmpDest, ptr
12313  // exitBB:
12314  // srw dest, tmpDest, shift
12315  if (ptrA != ZeroReg) {
12316  Ptr1Reg = RegInfo.createVirtualRegister(RC);
12317  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12318  .addReg(ptrA)
12319  .addReg(ptrB);
12320  } else {
12321  Ptr1Reg = ptrB;
12322  }
12323 
12324  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12325  // mode.
12326  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12327  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12328  .addImm(3)
12329  .addImm(27)
12330  .addImm(is8bit ? 28 : 27);
12331  if (!isLittleEndian)
12332  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12333  .addReg(Shift1Reg)
12334  .addImm(is8bit ? 24 : 16);
12335  if (is64bit)
12336  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12337  .addReg(Ptr1Reg)
12338  .addImm(0)
12339  .addImm(61);
12340  else
12341  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12342  .addReg(Ptr1Reg)
12343  .addImm(0)
12344  .addImm(0)
12345  .addImm(29);
12346  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12347  .addReg(newval)
12348  .addReg(ShiftReg);
12349  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12350  .addReg(oldval)
12351  .addReg(ShiftReg);
12352  if (is8bit)
12353  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12354  else {
12355  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12356  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12357  .addReg(Mask3Reg)
12358  .addImm(65535);
12359  }
12360  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12361  .addReg(Mask2Reg)
12362  .addReg(ShiftReg);
12363  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12364  .addReg(NewVal2Reg)
12365  .addReg(MaskReg);
12366  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12367  .addReg(OldVal2Reg)
12368  .addReg(MaskReg);
12369 
12370  BB = loop1MBB;
12371  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12372  .addReg(ZeroReg)
12373  .addReg(PtrReg);
12374  BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12375  .addReg(TmpDestReg)
12376  .addReg(MaskReg);
12377  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12378  .addReg(TmpReg)
12379  .addReg(OldVal3Reg);
12380  BuildMI(BB, dl, TII->get(PPC::BCC))
12382  .addReg(PPC::CR0)
12383  .addMBB(midMBB);
12384  BB->addSuccessor(loop2MBB);
12385  BB->addSuccessor(midMBB);
12386 
12387  BB = loop2MBB;
12388  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12389  .addReg(TmpDestReg)
12390  .addReg(MaskReg);
12391  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12392  .addReg(Tmp2Reg)
12393  .addReg(NewVal3Reg);
12394  BuildMI(BB, dl, TII->get(PPC::STWCX))
12395  .addReg(Tmp4Reg)
12396  .addReg(ZeroReg)
12397  .addReg(PtrReg);
12398  BuildMI(BB, dl, TII->get(PPC::BCC))
12400  .addReg(PPC::CR0)
12401  .addMBB(loop1MBB);
12402  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12403  BB->addSuccessor(loop1MBB);
12404  BB->addSuccessor(exitMBB);
12405 
12406  BB = midMBB;
12407  BuildMI(BB, dl, TII->get(PPC::STWCX))
12408  .addReg(TmpDestReg)
12409  .addReg(ZeroReg)
12410  .addReg(PtrReg);
12411  BB->addSuccessor(exitMBB);
12412 
12413  // exitMBB:
12414  // ...
12415  BB = exitMBB;
12416  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12417  .addReg(TmpReg)
12418  .addReg(ShiftReg);
12419  } else if (MI.getOpcode() == PPC::FADDrtz) {
12420  // This pseudo performs an FADD with rounding mode temporarily forced
12421  // to round-to-zero. We emit this via custom inserter since the FPSCR
12422  // is not modeled at the SelectionDAG level.
12423  Register Dest = MI.getOperand(0).getReg();
12424  Register Src1 = MI.getOperand(1).getReg();
12425  Register Src2 = MI.getOperand(2).getReg();
12426  DebugLoc dl = MI.getDebugLoc();
12427 
12428  MachineRegisterInfo &RegInfo = F->getRegInfo();
12429  Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12430 
12431  // Save FPSCR value.
12432  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12433 
12434  // Set rounding mode to round-to-zero.
12435  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12436  .addImm(31)
12438 
12439  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12440  .addImm(30)
12442 
12443  // Perform addition.
12444  auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12445  .addReg(Src1)
12446  .addReg(Src2);
12447  if (MI.getFlag(MachineInstr::NoFPExcept))
12449 
12450  // Restore FPSCR value.
12451  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12452  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12453  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12454  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12455  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12456  unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12457  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12458  ? PPC::ANDI8_rec
12459  : PPC::ANDI_rec;
12460  bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12461  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12462 
12463  MachineRegisterInfo &RegInfo = F->getRegInfo();
12464  Register Dest = RegInfo.createVirtualRegister(
12465  Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12466 
12467  DebugLoc Dl = MI.getDebugLoc();
12468  BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12469  .addReg(MI.getOperand(1).getReg())
12470  .addImm(1);
12471  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12472  MI.getOperand(0).getReg())
12473  .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12474  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12475  DebugLoc Dl = MI.getDebugLoc();
12476  MachineRegisterInfo &RegInfo = F->getRegInfo();
12477  Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12478  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12479  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12480  MI.getOperand(0).getReg())
12481  .addReg(CRReg);
12482  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12483  DebugLoc Dl = MI.getDebugLoc();
12484  unsigned Imm = MI.getOperand(1).getImm();
12485  BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12486  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12487  MI.getOperand(0).getReg())
12488  .addReg(PPC::CR0EQ);
12489  } else if (MI.getOpcode() == PPC::SETRNDi) {
12490  DebugLoc dl = MI.getDebugLoc();
12491  Register OldFPSCRReg = MI.getOperand(0).getReg();
12492 
12493  // Save FPSCR value.
12494  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12495 
12496  // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12497  // the following settings:
12498  // 00 Round to nearest
12499  // 01 Round to 0
12500  // 10 Round to +inf
12501  // 11 Round to -inf
12502 
12503  // When the operand is immediate, using the two least significant bits of
12504  // the immediate to set the bits 62:63 of FPSCR.
12505  unsigned Mode = MI.getOperand(1).getImm();
12506  BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12507  .addImm(31)
12509 
12510  BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12511  .addImm(30)
12513  } else if (MI.getOpcode() == PPC::SETRND) {
12514  DebugLoc dl = MI.getDebugLoc();
12515 
12516  // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12517  // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12518  // If the target doesn't have DirectMove, we should use stack to do the
12519  // conversion, because the target doesn't have the instructions like mtvsrd
12520  // or mfvsrd to do this conversion directly.
12521  auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12522  if (Subtarget.hasDirectMove()) {
12523  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12524  .addReg(SrcReg);
12525  } else {
12526  // Use stack to do the register copy.
12527  unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12528  MachineRegisterInfo &RegInfo = F->getRegInfo();
12529  const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12530  if (RC == &PPC::F8RCRegClass) {
12531  // Copy register from F8RCRegClass to G8RCRegclass.
12532  assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12533  "Unsupported RegClass.");
12534 
12535  StoreOp = PPC::STFD;
12536  LoadOp = PPC::LD;
12537  } else {
12538  // Copy register from G8RCRegClass to F8RCRegclass.
12539  assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12540  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12541  "Unsupported RegClass.");
12542  }
12543 
12544  MachineFrameInfo &MFI = F->getFrameInfo();
12545  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12546 
12547  MachineMemOperand *MMOStore = F->getMachineMemOperand(
12548  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12550  MFI.getObjectAlign(FrameIdx));
12551 
12552  // Store the SrcReg into the stack.
12553  BuildMI(*BB, MI, dl, TII->get(StoreOp))
12554  .addReg(SrcReg)
12555  .addImm(0)
12556  .addFrameIndex(FrameIdx)
12557  .addMemOperand(MMOStore);
12558 
12559  MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12560  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12562  MFI.getObjectAlign(FrameIdx));
12563 
12564  // Load from the stack where SrcReg is stored, and save to DestReg,
12565  // so we have done the RegClass conversion from RegClass::SrcReg to
12566  // RegClass::DestReg.
12567  BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12568  .addImm(0)
12569  .addFrameIndex(FrameIdx)
12570  .addMemOperand(MMOLoad);
12571  }
12572  };
12573 
12574  Register OldFPSCRReg = MI.getOperand(0).getReg();
12575 
12576  // Save FPSCR value.
12577  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12578 
12579  // When the operand is gprc register, use two least significant bits of the
12580  // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12581  //
12582  // copy OldFPSCRTmpReg, OldFPSCRReg
12583  // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12584  // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12585  // copy NewFPSCRReg, NewFPSCRTmpReg
12586  // mtfsf 255, NewFPSCRReg
12587  MachineOperand SrcOp = MI.getOperand(1);
12588  MachineRegisterInfo &RegInfo = F->getRegInfo();
12589  Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12590 
12591  copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12592 
12593  Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12594  Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12595 
12596  // The first operand of INSERT_SUBREG should be a register which has
12597  // subregisters, we only care about its RegClass, so we should use an
12598  // IMPLICIT_DEF register.
12599  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12600  BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12601  .addReg(ImDefReg)
12602  .add(SrcOp)
12603  .addImm(1);
12604 
12605  Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12606  BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12607  .addReg(OldFPSCRTmpReg)
12608  .addReg(ExtSrcReg)
12609  .addImm(0)
12610  .addImm(62);
12611 
12612  Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12613  copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12614 
12615  // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12616  // bits of FPSCR.
12617  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12618  .addImm(255)
12619  .addReg(NewFPSCRReg)
12620  .addImm(0)
12621  .addImm(0);
12622  } else if (MI.getOpcode() == PPC::SETFLM) {
12623  DebugLoc Dl = MI.getDebugLoc();
12624 
12625  // Result of setflm is previous FPSCR content, so we need to save it first.
12626  Register OldFPSCRReg = MI.getOperand(0).getReg();
12627  BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12628 
12629  // Put bits in 32:63 to FPSCR.
12630  Register NewFPSCRReg = MI.getOperand(1).getReg();
12631  BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12632  .addImm(255)
12633  .addReg(NewFPSCRReg)
12634  .addImm(0)
12635  .addImm(0);
12636  } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12637  MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12638  return emitProbedAlloca(MI, BB);
12639  } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12640  DebugLoc DL = MI.getDebugLoc();
12641  Register Src = MI.getOperand(2).getReg();
12642  Register Lo = MI.getOperand(0).getReg();
12643  Register Hi = MI.getOperand(1).getReg();
12644  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12645  .addDef(Lo)
12646  .addUse(Src, 0, PPC::sub_gp8_x1);
12647  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12648  .addDef(Hi)
12649  .addUse(Src, 0, PPC::sub_gp8_x0);
12650  } else {
12651  llvm_unreachable("Unexpected instr type to insert");
12652  }
12653 
12654  MI.eraseFromParent(); // The pseudo instruction is gone now.
12655  return BB;
12656 }
12657 
12658 //===----------------------------------------------------------------------===//
12659 // Target Optimization Hooks
12660 //===----------------------------------------------------------------------===//
12661 
12662 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12663  // For the estimates, convergence is quadratic, so we essentially double the
12664  // number of digits correct after every iteration. For both FRE and FRSQRTE,
12665  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12666  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12667  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12668  if (VT.getScalarType() == MVT::f64)
12669  RefinementSteps++;
12670  return RefinementSteps;
12671 }
12672 
12673 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12674  const DenormalMode &Mode) const {
12675  // We only have VSX Vector Test for software Square Root.
12676  EVT VT = Op.getValueType();
12677  if (!isTypeLegal(MVT::i1) ||
12678  (VT != MVT::f64 &&
12679  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12681 
12682  SDLoc DL(Op);
12683  // The output register of FTSQRT is CR field.
12685  // ftsqrt BF,FRB
12686  // Let e_b be the unbiased exponent of the double-precision
12687  // floating-point operand in register FRB.
12688  // fe_flag is set to 1 if either of the following conditions occurs.
12689  // - The double-precision floating-point operand in register FRB is a zero,
12690  // a NaN, or an infinity, or a negative value.
12691  // - e_b is less than or equal to -970.
12692  // Otherwise fe_flag is set to 0.
12693  // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12694  // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12695  // exponent is less than -970)
12696  SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12697  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12698  FTSQRT, SRIdxVal),
12699  0);
12700 }
12701 
12702 SDValue
12703 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12704  SelectionDAG &DAG) const {
12705  // We only have VSX Vector Square Root.
12706  EVT VT = Op.getValueType();
12707  if (VT != MVT::f64 &&
12708  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12710 
12711  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12712 }
12713 
12714 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12715  int Enabled, int &RefinementSteps,
12716  bool &UseOneConstNR,
12717  bool Reciprocal) const {
12718  EVT VT = Operand.getValueType();
12719  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12720  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12721  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12722  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12723  if (RefinementSteps == ReciprocalEstimate::Unspecified)
12724  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12725 
12726  // The Newton-Raphson computation with a single constant does not provide
12727  // enough accuracy on some CPUs.
12728  UseOneConstNR = !Subtarget.needsTwoConstNR();
12729  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12730  }
12731  return SDValue();
12732 }
12733 
12734 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12735  int Enabled,
12736  int &RefinementSteps) const {
12737  EVT VT = Operand.getValueType();
12738  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12739  (VT == MVT::f64 && Subtarget.hasFRE()) ||
12740  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12741  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12742  if (RefinementSteps == ReciprocalEstimate::Unspecified)
12743  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12744  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12745  }
12746  return SDValue();
12747 }
12748 
12749 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12750  // Note: This functionality is used only when unsafe-fp-math is enabled, and
12751  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12752  // enabled for division), this functionality is redundant with the default
12753  // combiner logic (once the division -> reciprocal/multiply transformation
12754  // has taken place). As a result, this matters more for older cores than for
12755  // newer ones.
12756 
12757  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12758  // reciprocal if there are two or more FDIVs (for embedded cores with only
12759  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12760  switch (Subtarget.getCPUDirective()) {
12761  default:
12762  return 3;
12763  case PPC::DIR_440:
12764  case PPC::DIR_A2:
12765  case PPC::DIR_E500:
12766  case PPC::DIR_E500mc:
12767  case PPC::DIR_E5500:
12768  return 2;
12769  }
12770 }
12771 
12772 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12773 // collapsed, and so we need to look through chains of them.
12775  int64_t& Offset, SelectionDAG &DAG) {
12776  if (DAG.isBaseWithConstantOffset(Loc)) {
12777  Base = Loc.getOperand(0);
12778  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12779 
12780  // The base might itself be a base plus an offset, and if so, accumulate
12781  // that as well.
12783  }
12784 }
12785 
12787  unsigned Bytes, int Dist,
12788  SelectionDAG &DAG) {
12789  if (VT.getSizeInBits() / 8 != Bytes)
12790  return false;
12791 
12792  SDValue BaseLoc = Base->getBasePtr();
12793  if (Loc.getOpcode() == ISD::FrameIndex) {
12794  if (BaseLoc.getOpcode() != ISD::FrameIndex)
12795  return false;
12796  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12797  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12798  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12799  int FS = MFI.getObjectSize(FI);
12800  int BFS = MFI.getObjectSize(BFI);
12801  if (FS != BFS || FS != (int)Bytes) return false;
12802  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12803  }
12804 
12805  SDValue Base1 = Loc, Base2 = BaseLoc;
12806  int64_t Offset1 = 0, Offset2 = 0;
12807  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12808  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12809  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12810  return true;
12811 
12812  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12813  const GlobalValue *GV1 = nullptr;
12814  const GlobalValue *GV2 = nullptr;
12815  Offset1 = 0;
12816  Offset2 = 0;
12817  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12818  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12819  if (isGA1 && isGA2 && GV1 == GV2)
12820  return Offset1 == (Offset2 + Dist*Bytes);
12821  return false;
12822 }
12823 
12824 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12825 // not enforce equality of the chain operands.
12827  unsigned Bytes, int Dist,
12828  SelectionDAG &DAG) {
12829  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12830  EVT VT = LS->getMemoryVT();
12831  SDValue Loc = LS->getBasePtr();
12832  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12833  }
12834 
12835  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12836  EVT VT;
12837  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12838  default: return false;
12839  case Intrinsic::ppc_altivec_lvx:
12840  case Intrinsic::ppc_altivec_lvxl:
12841  case Intrinsic::ppc_vsx_lxvw4x:
12842  case Intrinsic::ppc_vsx_lxvw4x_be:
12843  VT = MVT::v4i32;
12844  break;
12845  case Intrinsic::ppc_vsx_lxvd2x:
12846  case Intrinsic::ppc_vsx_lxvd2x_be:
12847  VT = MVT::v2f64;
12848  break;
12849  case Intrinsic::ppc_altivec_lvebx:
12850  VT = MVT::i8;
12851  break;
12852  case Intrinsic::ppc_altivec_lvehx:
12853  VT = MVT::i16;
12854  break;
12855  case Intrinsic::ppc_altivec_lvewx:
12856  VT = MVT::i32;
12857  break;
12858  }
12859 
12860  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12861  }
12862 
12863  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12864  EVT VT;
12865  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12866  default: return false;
12867  case Intrinsic::ppc_altivec_stvx:
12868  case Intrinsic::ppc_altivec_stvxl:
12869  case Intrinsic::ppc_vsx_stxvw4x:
12870  VT = MVT::v4i32;
12871  break;
12872  case Intrinsic::ppc_vsx_stxvd2x:
12873  VT = MVT::v2f64;
12874  break;
12875  case Intrinsic::ppc_vsx_stxvw4x_be:
12876  VT = MVT::v4i32;
12877  break;
12878  case Intrinsic::ppc_vsx_stxvd2x_be:
12879  VT = MVT::v2f64;
12880  break;
12881  case Intrinsic::ppc_altivec_stvebx:
12882  VT = MVT::i8;
12883  break;
12884  case Intrinsic::ppc_altivec_stvehx:
12885  VT = MVT::i16;
12886  break;
12887  case Intrinsic::ppc_altivec_stvewx:
12888  VT = MVT::i32;
12889  break;
12890  }
12891 
12892  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12893  }
12894 
12895  return false;
12896 }
12897 
12898 // Return true is there is a nearyby consecutive load to the one provided
12899 // (regardless of alignment). We search up and down the chain, looking though
12900 // token factors and other loads (but nothing else). As a result, a true result
12901 // indicates that it is safe to create a new consecutive load adjacent to the
12902 // load provided.
12904  SDValue Chain = LD->getChain();
12905  EVT VT = LD->getMemoryVT();
12906 
12907  SmallSet<SDNode *, 16> LoadRoots;
12908  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12909  SmallSet<SDNode *, 16> Visited;
12910 
12911  // First, search up the chain, branching to follow all token-factor operands.
12912  // If we find a consecutive load, then we're done, otherwise, record all
12913  // nodes just above the top-level loads and token factors.
12914  while (!Queue.empty()) {
12915  SDNode *ChainNext = Queue.pop_back_val();
12916  if (!Visited.insert(ChainNext).second)
12917  continue;
12918 
12919  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12920  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12921  return true;
12922 
12923  if (!Visited.count(ChainLD->getChain().getNode()))
12924  Queue.push_back(ChainLD->getChain().getNode());
12925  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12926  for (const SDUse &O : ChainNext->ops())
12927  if (!Visited.count(O.getNode()))
12928  Queue.push_back(O.getNode());
12929  } else
12930  LoadRoots.insert(ChainNext);
12931  }
12932 
12933  // Second, search down the chain, starting from the top-level nodes recorded
12934  // in the first phase. These top-level nodes are the nodes just above all
12935  // loads and token factors. Starting with their uses, recursively look though
12936  // all loads (just the chain uses) and token factors to find a consecutive
12937  // load.
12938  Visited.clear();
12939  Queue.clear();
12940 
12941  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12942  IE = LoadRoots.end(); I != IE; ++I) {
12943  Queue.push_back(*I);
12944 
12945  while (!Queue.empty()) {
12946  SDNode *LoadRoot = Queue.pop_back_val();
12947  if (!Visited.insert(LoadRoot).second)
12948  continue;
12949 
12950  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12951  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12952  return true;
12953 
12954  for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12955  UE = LoadRoot->use_end(); UI != UE; ++UI)
12956  if (((isa<MemSDNode>(*UI) &&
12957  cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12958  UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12959  Queue.push_back(*UI);
12960  }
12961  }
12962 
12963  return false;
12964 }
12965 
12966 /// This function is called when we have proved that a SETCC node can be replaced
12967 /// by subtraction (and other supporting instructions) so that the result of
12968 /// comparison is kept in a GPR instead of CR. This function is purely for
12969 /// codegen purposes and has some flags to guide the codegen process.
12970 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12971  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12972  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12973 
12974  // Zero extend the operands to the largest legal integer. Originally, they
12975  // must be of a strictly smaller size.
12976  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12977  DAG.getConstant(Size, DL, MVT::i32));
12978  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12979  DAG.getConstant(Size, DL, MVT::i32));
12980 
12981  // Swap if needed. Depends on the condition code.
12982  if (Swap)
12983  std::swap(Op0, Op1);
12984 
12985  // Subtract extended integers.
12986  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12987 
12988  // Move the sign bit to the least significant position and zero out the rest.
12989  // Now the least significant bit carries the result of original comparison.
12990  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12991  DAG.getConstant(Size - 1, DL, MVT::i32));
12992  auto Final = Shifted;
12993 
12994  // Complement the result if needed. Based on the condition code.
12995  if (Complement)
12996  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12997  DAG.getConstant(1, DL, MVT::i64));
12998 
12999  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13000 }
13001 
13002 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13003  DAGCombinerInfo &DCI) const {
13004  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13005 
13006  SelectionDAG &DAG = DCI.DAG;
13007  SDLoc DL(N);
13008 
13009  // Size of integers being compared has a critical role in the following
13010  // analysis, so we prefer to do this when all types are legal.
13011  if (!DCI.isAfterLegalizeDAG())
13012  return SDValue();
13013 
13014  // If all users of SETCC extend its value to a legal integer type
13015  // then we replace SETCC with a subtraction
13016  for (SDNode::use_iterator UI = N->use_begin(),
13017  UE = N->use_end(); UI != UE; ++UI) {
13018  if (UI->getOpcode() != ISD::ZERO_EXTEND)
13019  return SDValue();
13020  }
13021 
13022  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13023  auto OpSize = N->getOperand(0).getValueSizeInBits();
13024 
13026 
13027  if (OpSize < Size) {
13028  switch (CC) {
13029  default: break;
13030  case ISD::SETULT:
13031  return generateEquivalentSub(N, Size, false, false, DL, DAG);
13032  case ISD::SETULE:
13033  return generateEquivalentSub(N, Size, true, true, DL, DAG);
13034  case ISD::SETUGT:
13035  return generateEquivalentSub(N, Size, false, true, DL, DAG);
13036  case ISD::SETUGE:
13037  return generateEquivalentSub(N, Size, true, false, DL, DAG);
13038  }
13039  }
13040 
13041  return SDValue();
13042 }
13043 
13044 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13045  DAGCombinerInfo &DCI) const {
13046  SelectionDAG &DAG = DCI.DAG;
13047  SDLoc dl(N);
13048 
13049  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13050  // If we're tracking CR bits, we need to be careful that we don't have:
13051  // trunc(binary-ops(zext(x), zext(y)))
13052  // or
13053  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13054  // such that we're unnecessarily moving things into GPRs when it would be
13055  // better to keep them in CR bits.
13056 
13057  // Note that trunc here can be an actual i1 trunc, or can be the effective
13058  // truncation that comes from a setcc or select_cc.
13059  if (N->getOpcode() == ISD::TRUNCATE &&
13060  N->getValueType(0) != MVT::i1)
13061  return SDValue();
13062 
13063  if (N->getOperand(0).getValueType() != MVT::i32 &&
13064  N->getOperand(0).getValueType() != MVT::i64)
13065  return SDValue();
13066 
13067  if (N->getOpcode() == ISD::SETCC ||
13068  N->getOpcode() == ISD::SELECT_CC) {
13069  // If we're looking at a comparison, then we need to make sure that the
13070  // high bits (all except for the first) don't matter the result.
13071  ISD::CondCode CC =
13072  cast<CondCodeSDNode>(N->getOperand(
13073  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13074  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13075 
13076  if (ISD::isSignedIntSetCC(CC)) {
13077  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13078  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13079  return SDValue();
13080  } else if (ISD::isUnsignedIntSetCC(CC)) {
13081  if (!DAG.MaskedValueIsZero(N->getOperand(0),
13082  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13083  !DAG.MaskedValueIsZero(N->getOperand(1),
13084  APInt::getHighBitsSet(OpBits, OpBits-1)))
13085  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13086  : SDValue());
13087  } else {
13088  // This is neither a signed nor an unsigned comparison, just make sure
13089  // that the high bits are equal.
13090  KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13091  KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13092 
13093  // We don't really care about what is known about the first bit (if
13094  // anything), so pretend that it is known zero for both to ensure they can
13095  // be compared as constants.
13096  Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13097  Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13098 
13099  if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13100  Op1Known.getConstant() != Op2Known.getConstant())
13101  return SDValue();
13102  }
13103  }
13104 
13105  // We now know that the higher-order bits are irrelevant, we just need to
13106  // make sure that all of the intermediate operations are bit operations, and
13107  // all inputs are extensions.
13108  if (N->getOperand(0).getOpcode() != ISD::AND &&
13109  N->getOperand(0).getOpcode() != ISD::OR &&
13110  N->getOperand(0).getOpcode() != ISD::XOR &&
13111  N->getOperand(0).getOpcode() != ISD::SELECT &&
13112  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13113  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13114  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13115  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13116  N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13117  return SDValue();
13118 
13119  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13120  N->getOperand(1).getOpcode() != ISD::AND &&
13121  N->getOperand(1).getOpcode() != ISD::OR &&
13122  N->getOperand(1).getOpcode() != ISD::XOR &&
13123  N->getOperand(1).getOpcode() != ISD::SELECT &&
13124  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13125  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13126  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13127  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13128  N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13129  return SDValue();
13130 
13131  SmallVector<SDValue, 4> Inputs;
13132  SmallVector<SDValue, 8> BinOps, PromOps;
13133  SmallPtrSet<SDNode *, 16> Visited;
13134 
13135  for (unsigned i = 0; i < 2; ++i) {
13136  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13137  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13138  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13139  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13140  isa<ConstantSDNode>(N->getOperand(i)))
13141  Inputs.push_back(N->getOperand(i));
13142  else
13143  BinOps.push_back(N->getOperand(i));
13144 
13145  if (N->getOpcode() == ISD::TRUNCATE)
13146  break;
13147  }
13148 
13149  // Visit all inputs, collect all binary operations (and, or, xor and
13150  // select) that are all fed by extensions.
13151  while (!BinOps.empty()) {
13152  SDValue BinOp = BinOps.pop_back_val();
13153 
13154  if (!Visited.insert(BinOp.getNode()).second)
13155  continue;
13156 
13157  PromOps.push_back(BinOp);
13158 
13159  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13160  // The condition of the select is not promoted.
13161  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13162  continue;
13163  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13164  continue;
13165 
13166  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13167  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13168  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13169  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13170  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13171  Inputs.push_back(BinOp.getOperand(i));
13172  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13173  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13174  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13175  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13176  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13177  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13178  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13179  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13180  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13181  BinOps.push_back(BinOp.getOperand(i));
13182  } else {
13183  // We have an input that is not an extension or another binary
13184  // operation; we'll abort this transformation.
13185  return SDValue();
13186  }
13187  }
13188  }
13189 
13190  // Make sure that this is a self-contained cluster of operations (which
13191  // is not quite the same thing as saying that everything has only one
13192  // use).
13193  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13194  if (isa<ConstantSDNode>(Inputs[i]))
13195  continue;
13196 
13197  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13198  UE = Inputs[i].getNode()->use_end();
13199  UI != UE; ++UI) {
13200  SDNode *User = *UI;
13201  if (User != N && !Visited.count(User))
13202  return SDValue();
13203 
13204  // Make sure that we're not going to promote the non-output-value
13205  // operand(s) or SELECT or SELECT_CC.
13206  // FIXME: Although we could sometimes handle this, and it does occur in
13207  // practice that one of the condition inputs to the select is also one of
13208  // the outputs, we currently can't deal with this.
13209  if (User->getOpcode() == ISD::SELECT) {
13210  if (User->getOperand(0) == Inputs[i])
13211  return SDValue();
13212  } else if (User->getOpcode() == ISD::SELECT_CC) {
13213  if (User->getOperand(0) == Inputs[i] ||
13214  User->getOperand(1) == Inputs[i])
13215  return SDValue();
13216  }
13217  }
13218  }
13219 
13220  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13221  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13222  UE = PromOps[i].getNode()->use_end();
13223  UI != UE; ++UI) {
13224  SDNode *User = *UI;
13225  if (User != N && !Visited.count(User))
13226  return SDValue();
13227 
13228  // Make sure that we're not going to promote the non-output-value
13229  // operand(s) or SELECT or SELECT_CC.
13230  // FIXME: Although we could sometimes handle this, and it does occur in
13231  // practice that one of the condition inputs to the select is also one of
13232  // the outputs, we currently can't deal with this.
13233  if (User->getOpcode() == ISD::SELECT) {
13234  if (User->getOperand(0) == PromOps[i])
13235  return SDValue();
13236  } else if (User->getOpcode() == ISD::SELECT_CC) {
13237  if (User->getOperand(0) == PromOps[i] ||
13238  User->getOperand(1) == PromOps[i])
13239  return SDValue();
13240  }
13241  }
13242  }
13243 
13244  // Replace all inputs with the extension operand.
13245  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13246  // Constants may have users outside the cluster of to-be-promoted nodes,
13247  // and so we need to replace those as we do the promotions.
13248  if (isa<ConstantSDNode>(Inputs[i]))
13249  continue;
13250  else
13251  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13252  }
13253 
13254  std::list<HandleSDNode> PromOpHandles;
13255  for (auto &PromOp : PromOps)
13256  PromOpHandles.emplace_back(PromOp);
13257 
13258  // Replace all operations (these are all the same, but have a different
13259  // (i1) return type). DAG.getNode will validate that the types of
13260  // a binary operator match, so go through the list in reverse so that
13261  // we've likely promoted both operands first. Any intermediate truncations or
13262  // extensions disappear.
13263  while (!PromOpHandles.empty()) {
13264  SDValue PromOp = PromOpHandles.back().getValue();
13265  PromOpHandles.pop_back();
13266 
13267  if (PromOp.getOpcode() == ISD::TRUNCATE ||
13268  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13269  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13270  PromOp.getOpcode() == ISD::ANY_EXTEND) {
13271  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13272  PromOp.getOperand(0).getValueType() != MVT::i1) {
13273  // The operand is not yet ready (see comment below).
13274  PromOpHandles.emplace_front(PromOp);
13275  continue;
13276  }
13277 
13278  SDValue RepValue = PromOp.getOperand(0);
13279  if (isa<ConstantSDNode>(RepValue))
13280  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13281 
13282  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13283  continue;
13284  }
13285 
13286  unsigned C;
13287  switch (PromOp.getOpcode()) {
13288  default: C = 0; break;
13289  case ISD::SELECT: C = 1; break;
13290  case ISD::SELECT_CC: C = 2; break;
13291  }
13292 
13293  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13294  PromOp.getOperand(C).getValueType() != MVT::i1) ||
13295  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13296  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13297  // The to-be-promoted operands of this node have not yet been
13298  // promoted (this should be rare because we're going through the
13299  // list backward, but if one of the operands has several users in
13300  // this cluster of to-be-promoted nodes, it is possible).
13301  PromOpHandles.emplace_front(PromOp);
13302  continue;
13303  }
13304 
13305  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13306  PromOp.getNode()->op_end());
13307 
13308  // If there are any constant inputs, make sure they're replaced now.
13309  for (unsigned i = 0; i < 2; ++i)
13310  if (isa<ConstantSDNode>(Ops[C+i]))
13311  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13312 
13313  DAG.ReplaceAllUsesOfValueWith(PromOp,
13314  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13315  }
13316 
13317  // Now we're left with the initial truncation itself.
13318  if (N->getOpcode() == ISD::TRUNCATE)
13319  return N->getOperand(0);
13320 
13321  // Otherwise, this is a comparison. The operands to be compared have just
13322  // changed type (to i1), but everything else is the same.
13323  return SDValue(N, 0);
13324 }
13325 
13326 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13327  DAGCombinerInfo &DCI) const {
13328  SelectionDAG &DAG = DCI.DAG;
13329  SDLoc dl(N);
13330 
13331  // If we're tracking CR bits, we need to be careful that we don't have:
13332  // zext(binary-ops(trunc(x), trunc(y)))
13333  // or
13334  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13335  // such that we're unnecessarily moving things into CR bits that can more
13336  // efficiently stay in GPRs. Note that if we're not certain that the high
13337  // bits are set as required by the final extension, we still may need to do
13338  // some masking to get the proper behavior.
13339 
13340  // This same functionality is important on PPC64 when dealing with
13341  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13342  // the return values of functions. Because it is so similar, it is handled
13343  // here as well.
13344 
13345  if (N->getValueType(0) != MVT::i32 &&
13346  N->getValueType(0) != MVT::i64)
13347  return SDValue();
13348 
13349  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13350  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13351  return SDValue();
13352 
13353  if (N->getOperand(0).getOpcode() != ISD::AND &&
13354  N->getOperand(0).getOpcode() != ISD::OR &&
13355  N->getOperand(0).getOpcode() != ISD::XOR &&
13356  N->getOperand(0).getOpcode() != ISD::SELECT &&
13357  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13358  return SDValue();
13359 
13360  SmallVector<SDValue, 4> Inputs;
13361  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13362  SmallPtrSet<SDNode *, 16> Visited;
13363 
13364  // Visit all inputs, collect all binary operations (and, or, xor and
13365  // select) that are all fed by truncations.
13366  while (!BinOps.empty()) {
13367  SDValue BinOp = BinOps.pop_back_val();
13368 
13369  if (!Visited.insert(BinOp.getNode()).second)
13370  continue;
13371 
13372  PromOps.push_back(BinOp);
13373 
13374  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13375  // The condition of the select is not promoted.
13376  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13377  continue;
13378  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13379  continue;
13380 
13381  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13382  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13383  Inputs.push_back(BinOp.getOperand(i));
13384  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13385  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13386  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13387  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13388  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13389  BinOps.push_back(BinOp.getOperand(i));
13390  } else {
13391  // We have an input that is not a truncation or another binary
13392  // operation; we'll abort this transformation.
13393  return SDValue();
13394  }
13395  }
13396  }
13397 
13398  // The operands of a select that must be truncated when the select is
13399  // promoted because the operand is actually part of the to-be-promoted set.
13400  DenseMap<SDNode *, EVT> SelectTruncOp[2];
13401 
13402  // Make sure that this is a self-contained cluster of operations (which
13403  // is not quite the same thing as saying that everything has only one
13404  // use).
13405  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13406  if (isa<ConstantSDNode>(Inputs[i]))
13407  continue;
13408 
13409  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13410  UE = Inputs[i].getNode()->use_end();
13411  UI != UE; ++UI) {
13412  SDNode *User = *UI;
13413  if (User != N && !Visited.count(User))
13414  return SDValue();
13415 
13416  // If we're going to promote the non-output-value operand(s) or SELECT or
13417  // SELECT_CC, record them for truncation.
13418  if (User->getOpcode() == ISD::SELECT) {
13419  if (User->getOperand(0) == Inputs[i])
13420  SelectTruncOp[0].insert(std::make_pair(User,
13421  User->getOperand(0).getValueType()));
13422  } else if (User->getOpcode() == ISD::SELECT_CC) {
13423  if (User->getOperand(0) == Inputs[i])
13424  SelectTruncOp[0].insert(std::make_pair(User,
13425  User->getOperand(0).getValueType()));
13426  if (User->getOperand(1) == Inputs[i])
13427  SelectTruncOp[1].insert(std::make_pair(User,
13428  User->getOperand(1).getValueType()));
13429  }
13430  }
13431  }
13432 
13433  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13434  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13435  UE = PromOps[i].getNode()->use_end();
13436  UI != UE; ++UI) {
13437  SDNode *User = *UI;
13438  if (User != N && !Visited.count(User))
13439  return SDValue();
13440 
13441  // If we're going to promote the non-output-value operand(s) or SELECT or
13442  // SELECT_CC, record them for truncation.
13443  if (User->getOpcode() == ISD::SELECT) {
13444  if (User->getOperand(0) == PromOps[i])
13445  SelectTruncOp[0].insert(std::make_pair(User,
13446  User->getOperand(0).getValueType()));
13447  } else if (User->getOpcode() == ISD::SELECT_CC) {
13448  if (User->getOperand(0) == PromOps[i])
13449  SelectTruncOp[0].insert(std::make_pair(User,
13450  User->getOperand(0).getValueType()));
13451  if (User->getOperand(1) == PromOps[i])
13452  SelectTruncOp[1].insert(std::make_pair(User,
13453  User->getOperand(1).getValueType()));
13454  }
13455  }
13456  }
13457 
13458  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13459  bool ReallyNeedsExt = false;
13460  if (N->getOpcode() != ISD::ANY_EXTEND) {
13461  // If all of the inputs are not already sign/zero extended, then
13462  // we'll still need to do that at the end.
13463  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13464  if (isa<ConstantSDNode>(Inputs[i]))
13465  continue;
13466 
13467  unsigned OpBits =
13468  Inputs[i].getOperand(0).getValueSizeInBits();
13469  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13470 
13471  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13472  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13473  APInt::getHighBitsSet(OpBits,
13474  OpBits-PromBits))) ||
13475  (N->getOpcode() == ISD::SIGN_EXTEND &&
13476  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13477  (OpBits-(PromBits-1)))) {
13478  ReallyNeedsExt = true;
13479  break;
13480  }
13481  }
13482  }
13483 
13484  // Replace all inputs, either with the truncation operand, or a
13485  // truncation or extension to the final output type.
13486  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13487  // Constant inputs need to be replaced with the to-be-promoted nodes that
13488  // use them because they might have users outside of the cluster of
13489  // promoted nodes.
13490  if (isa<ConstantSDNode>(Inputs[i]))
13491  continue;
13492 
13493  SDValue InSrc = Inputs[i].getOperand(0);
13494  if (Inputs[i].getValueType() == N->getValueType(0))
13495  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13496  else if (N->getOpcode() == ISD::SIGN_EXTEND)
13497  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13498  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13499  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13500  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13501  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13502  else
13503  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13504  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13505  }
13506 
13507  std::list<HandleSDNode> PromOpHandles;
13508  for (auto &PromOp : PromOps)
13509  PromOpHandles.emplace_back(PromOp);
13510 
13511  // Replace all operations (these are all the same, but have a different
13512  // (promoted) return type). DAG.getNode will validate that the types of
13513  // a binary operator match, so go through the list in reverse so that
13514  // we've likely promoted both operands first.
13515  while (!PromOpHandles.empty()) {
13516  SDValue PromOp = PromOpHandles.back().getValue();
13517  PromOpHandles.pop_back();
13518 
13519  unsigned C;
13520  switch (PromOp.getOpcode()) {
13521  default: C = 0; break;
13522  case ISD::SELECT: C = 1; break;
13523  case ISD::SELECT_CC: C = 2; break;
13524  }
13525 
13526  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13527  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13528  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13529  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13530  // The to-be-promoted operands of this node have not yet been
13531  // promoted (this should be rare because we're going through the
13532  // list backward, but if one of the operands has several users in
13533  // this cluster of to-be-promoted nodes, it is possible).
13534  PromOpHandles.emplace_front(PromOp);
13535  continue;
13536  }
13537 
13538  // For SELECT and SELECT_CC nodes, we do a similar check for any
13539  // to-be-promoted comparison inputs.
13540  if (PromOp.getOpcode() == ISD::SELECT ||
13541  PromOp.getOpcode() == ISD::SELECT_CC) {
13542  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13543  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13544  (SelectTruncOp[1].count(PromOp.getNode()) &&
13545  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13546  PromOpHandles.emplace_front(PromOp);
13547  continue;
13548  }
13549  }
13550 
13551  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13552  PromOp.getNode()->op_end());
13553 
13554  // If this node has constant inputs, then they'll need to be promoted here.
13555  for (unsigned i = 0; i < 2; ++i) {
13556  if (!isa<ConstantSDNode>(Ops[C+i]))
13557  continue;
13558  if (Ops[C+i].getValueType() == N->getValueType(0))
13559  continue;
13560 
13561  if (N->getOpcode() == ISD::SIGN_EXTEND)
13562  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13563  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13564  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13565  else
13566  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13567  }
13568 
13569  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13570  // truncate them again to the original value type.
13571  if (PromOp.getOpcode() == ISD::SELECT ||
13572  PromOp.getOpcode() == ISD::SELECT_CC) {
13573  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13574  if (SI0 != SelectTruncOp[0].end())
13575  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13576  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13577  if (SI1 != SelectTruncOp[1].end())
13578  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13579  }
13580 
13581  DAG.ReplaceAllUsesOfValueWith(PromOp,
13582  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13583  }
13584 
13585  // Now we're left with the initial extension itself.
13586  if (!ReallyNeedsExt)
13587  return N->getOperand(0);
13588 
13589  // To zero extend, just mask off everything except for the first bit (in the
13590  // i1 case).
13591  if (N->getOpcode() == ISD::ZERO_EXTEND)
13592  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13594  N->getValueSizeInBits(0), PromBits),
13595  dl, N->getValueType(0)));
13596 
13597  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13598  "Invalid extension type");
13599  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13600  SDValue ShiftCst =
13601  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13602  return DAG.getNode(
13603  ISD::SRA, dl, N->getValueType(0),
13604  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13605  ShiftCst);
13606 }
13607 
13608 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13609  DAGCombinerInfo &DCI) const {
13610  assert(N->getOpcode() == ISD::SETCC &&
13611  "Should be called with a SETCC node");
13612 
13613  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13614  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13615  SDValue LHS = N->getOperand(0);
13616  SDValue RHS = N->getOperand(1);
13617 
13618  // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13619  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13620  LHS.hasOneUse())
13621  std::swap(LHS, RHS);
13622 
13623  // x == 0-y --> x+y == 0
13624  // x != 0-y --> x+y != 0
13625  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13626  RHS.hasOneUse()) {
13627  SDLoc DL(N);
13628  SelectionDAG &DAG = DCI.DAG;
13629  EVT VT = N->getValueType(0);
13630  EVT OpVT = LHS.getValueType();
13631  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13632  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13633  }
13634  }
13635 
13636  return DAGCombineTruncBoolExt(N, DCI);
13637 }
13638 
13639 // Is this an extending load from an f32 to an f64?
13640 static bool isFPExtLoad(SDValue Op) {
13641  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13642  return LD->getExtensionType() == ISD::EXTLOAD &&
13643  Op.getValueType() == MVT::f64;
13644  return false;
13645 }
13646 
13647 /// Reduces the number of fp-to-int conversion when building a vector.
13648 ///
13649 /// If this vector is built out of floating to integer conversions,
13650 /// transform it to a vector built out of floating point values followed by a
13651 /// single floating to integer conversion of the vector.
13652 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13653 /// becomes (fptosi (build_vector ($A, $B, ...)))
13654 SDValue PPCTargetLowering::
13655 combineElementTruncationToVectorTruncation(SDNode *N,
13656  DAGCombinerInfo &DCI) const {
13657  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13658  "Should be called with a BUILD_VECTOR node");
13659 
13660  SelectionDAG &DAG = DCI.DAG;
13661  SDLoc dl(N);
13662 
13663  SDValue FirstInput = N->getOperand(0);
13664  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13665  "The input operand must be an fp-to-int conversion.");
13666 
13667  // This combine happens after legalization so the fp_to_[su]i nodes are
13668  // already converted to PPCSISD nodes.
13669  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13670  if (FirstConversion == PPCISD::FCTIDZ ||
13671  FirstConversion == PPCISD::FCTIDUZ ||
13672  FirstConversion == PPCISD::FCTIWZ ||
13673  FirstConversion == PPCISD::FCTIWUZ) {
13674  bool IsSplat = true;
13675  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13676  FirstConversion == PPCISD::FCTIWUZ;
13677  EVT SrcVT = FirstInput.getOperand(0).getValueType();
13679  EVT TargetVT = N->getValueType(0);
13680  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13681  SDValue NextOp = N->getOperand(i);
13682  if (NextOp.getOpcode() != PPCISD::MFVSR)
13683  return SDValue();
13684  unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13685  if (NextConversion != FirstConversion)
13686  return SDValue();
13687  // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13688  // This is not valid if the input was originally double precision. It is
13689  // also not profitable to do unless this is an extending load in which
13690  // case doing this combine will allow us to combine consecutive loads.
13691  if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13692  return SDValue();
13693  if (N->getOperand(i) != FirstInput)
13694  IsSplat = false;
13695  }
13696 
13697  // If this is a splat, we leave it as-is since there will be only a single
13698  // fp-to-int conversion followed by a splat of the integer. This is better
13699  // for 32-bit and smaller ints and neutral for 64-bit ints.
13700  if (IsSplat)
13701  return SDValue();
13702 
13703  // Now that we know we have the right type of node, get its operands
13704  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13705  SDValue In = N->getOperand(i).getOperand(0);
13706  if (Is32Bit) {
13707  // For 32-bit values, we need to add an FP_ROUND node (if we made it
13708  // here, we know that all inputs are extending loads so this is safe).
13709  if (In.isUndef())
13710  Ops.push_back(DAG.getUNDEF(SrcVT));
13711  else {
13712  SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13713  MVT::f32, In.getOperand(0),
13714  DAG.getIntPtrConstant(1, dl));
13715  Ops.push_back(Trunc);
13716  }
13717  } else
13718  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13719  }
13720 
13721  unsigned Opcode;
13722  if (FirstConversion == PPCISD::FCTIDZ ||
13723  FirstConversion == PPCISD::FCTIWZ)
13724  Opcode = ISD::FP_TO_SINT;
13725  else
13726  Opcode = ISD::FP_TO_UINT;
13727 
13728  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13729  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13730  return DAG.getNode(Opcode, dl, TargetVT, BV);
13731  }
13732  return SDValue();
13733 }
13734 
13735 /// Reduce the number of loads when building a vector.
13736 ///
13737 /// Building a vector out of multiple loads can be converted to a load
13738 /// of the vector type if the loads are consecutive. If the loads are
13739 /// consecutive but in descending order, a shuffle is added at the end
13740 /// to reorder the vector.
13742  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13743  "Should be called with a BUILD_VECTOR node");
13744 
13745  SDLoc dl(N);
13746 
13747  // Return early for non byte-sized type, as they can't be consecutive.
13748  if (!N->getValueType(0).getVectorElementType().isByteSized())
13749  return SDValue();
13750 
13751  bool InputsAreConsecutiveLoads = true;
13752  bool InputsAreReverseConsecutive = true;
13753  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13754  SDValue FirstInput = N->getOperand(0);
13755  bool IsRoundOfExtLoad = false;
13756 
13757  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13758  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13759  LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13760  IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13761  }
13762  // Not a build vector of (possibly fp_rounded) loads.
13763  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13764  N->getNumOperands() == 1)
13765  return SDValue();
13766 
13767  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13768  // If any inputs are fp_round(extload), they all must be.
13769  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13770  return SDValue();
13771 
13772  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13773  N->getOperand(i);
13774  if (NextInput.getOpcode() != ISD::LOAD)
13775  return SDValue();
13776 
13777  SDValue PreviousInput =
13778  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13779  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13780  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13781 
13782  // If any inputs are fp_round(extload), they all must be.
13783  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13784  return SDValue();
13785 
13786  if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13787  InputsAreConsecutiveLoads = false;
13788  if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13789  InputsAreReverseConsecutive = false;
13790 
13791  // Exit early if the loads are neither consecutive nor reverse consecutive.
13792  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13793  return SDValue();
13794  }
13795 
13796  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13797  "The loads cannot be both consecutive and reverse consecutive.");
13798 
13799  SDValue FirstLoadOp =
13800  IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13801  SDValue LastLoadOp =
13802  IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13803  N->getOperand(N->getNumOperands()-1);
13804 
13805  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13806  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13807  if (InputsAreConsecutiveLoads) {
13808  assert(LD1 && "Input needs to be a LoadSDNode.");
13809  return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13810  LD1->getBasePtr(), LD1->getPointerInfo(),
13811  LD1->getAlignment());
13812  }
13813  if (InputsAreReverseConsecutive) {
13814  assert(LDL && "Input needs to be a LoadSDNode.");
13815  SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13816  LDL->getBasePtr(), LDL->getPointerInfo(),
13817  LDL->getAlignment());
13819  for (int i = N->getNumOperands() - 1; i >= 0; i--)
13820  Ops.push_back(i);
13821 
13822  return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13823  DAG.getUNDEF(N->getValueType(0)), Ops);
13824  }
13825  return SDValue();
13826 }
13827 
13828 // This function adds the required vector_shuffle needed to get
13829 // the elements of the vector extract in the correct position
13830 // as specified by the CorrectElems encoding.
13832  SDValue Input, uint64_t Elems,
13833  uint64_t CorrectElems) {
13834  SDLoc dl(N);
13835 
13836  unsigned NumElems = Input.getValueType().getVectorNumElements();
13837  SmallVector<int, 16> ShuffleMask(NumElems, -1);
13838 
13839  // Knowing the element indices being extracted from the original
13840  // vector and the order in which they're being inserted, just put
13841  // them at element indices required for the instruction.
13842  for (unsigned i = 0; i < N->getNumOperands(); i++) {
13843  if (DAG.getDataLayout().isLittleEndian())
13844  ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13845  else
13846  ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13847  CorrectElems = CorrectElems >> 8;
13848  Elems = Elems >> 8;
13849  }
13850 
13851  SDValue Shuffle =
13852  DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13853  DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13854 
13855  EVT VT = N->getValueType(0);
13856  SDValue Conv = DAG.getBitcast(VT, Shuffle);
13857 
13858  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13859  Input.getValueType().getVectorElementType(),
13860  VT.getVectorNumElements());
13861  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13862  DAG.getValueType(ExtVT));
13863 }
13864 
13865 // Look for build vector patterns where input operands come from sign
13866 // extended vector_extract elements of specific indices. If the correct indices
13867 // aren't used, add a vector shuffle to fix up the indices and create
13868 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13869 // during instruction selection.
13871  // This array encodes the indices that the vector sign extend instructions
13872  // extract from when extending from one type to another for both BE and LE.
13873  // The right nibble of each byte corresponds to the LE incides.
13874  // and the left nibble of each byte corresponds to the BE incides.
13875  // For example: 0x3074B8FC byte->word
13876  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13877  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13878  // For example: 0x000070F8 byte->double word
13879  // For LE: the allowed indices are: 0x0,0x8
13880  // For BE: the allowed indices are: 0x7,0xF
13881  uint64_t TargetElems[] = {
13882  0x3074B8FC, // b->w
13883  0x000070F8, // b->d
13884  0x10325476, // h->w
13885  0x00003074, // h->d
13886  0x00001032, // w->d
13887  };
13888 
13889  uint64_t Elems = 0;
13890  int Index;
13891  SDValue Input;
13892 
13893  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13894  if (!Op)
13895  return false;
13896  if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13897  Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13898  return false;
13899 
13900  // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13901  // of the right width.
13902  SDValue Extract = Op.getOperand(0);
13903  if (Extract.getOpcode() == ISD::ANY_EXTEND)
13904  Extract = Extract.getOperand(0);
13905  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13906  return false;
13907 
13908  ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13909  if (!ExtOp)
13910  return false;
13911 
13912  Index = ExtOp->getZExtValue();
13913  if (Input && Input != Extract.getOperand(0))
13914  return false;
13915 
13916  if (!Input)
13917  Input = Extract.getOperand(0);
13918 
13919  Elems = Elems << 8;
13920  Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13921  Elems |= Index;
13922 
13923  return true;
13924  };
13925 
13926  // If the build vector operands aren't sign extended vector extracts,
13927  // of the same input vector, then return.
13928  for (unsigned i = 0; i < N->getNumOperands(); i++) {
13929  if (!isSExtOfVecExtract(N->getOperand(i))) {
13930  return SDValue();
13931  }
13932  }
13933 
13934  // If the vector extract indicies are not correct, add the appropriate
13935  // vector_shuffle.
13936  int TgtElemArrayIdx;
13937  int InputSize = Input.getValueType().getScalarSizeInBits();
13938  int OutputSize = N->getValueType(0).getScalarSizeInBits();
13939  if (InputSize + OutputSize == 40)
13940  TgtElemArrayIdx = 0;
13941  else if (InputSize + OutputSize == 72)
13942  TgtElemArrayIdx = 1;
13943  else if (InputSize + OutputSize == 48)
13944  TgtElemArrayIdx = 2;
13945  else if (InputSize + OutputSize == 80)
13946  TgtElemArrayIdx = 3;
13947  else if (InputSize + OutputSize == 96)
13948  TgtElemArrayIdx = 4;
13949  else
13950  return SDValue();
13951 
13952  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13953  CorrectElems = DAG.getDataLayout().isLittleEndian()
13954  ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13955  : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13956  if (Elems != CorrectElems) {
13957  return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13958  }
13959 
13960  // Regular lowering will catch cases where a shuffle is not needed.
13961  return SDValue();
13962 }
13963 
13964 // Look for the pattern of a load from a narrow width to i128, feeding
13965 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13966 // (LXVRZX). This node represents a zero extending load that will be matched
13967 // to the Load VSX Vector Rightmost instructions.
13969  SDLoc DL(N);
13970 
13971  // This combine is only eligible for a BUILD_VECTOR of v1i128.
13972  if (N->getValueType(0) != MVT::v1i128)
13973  return SDValue();
13974 
13975  SDValue Operand = N->getOperand(0);
13976  // Proceed with the transformation if the operand to the BUILD_VECTOR
13977  // is a load instruction.
13978  if (Operand.getOpcode() != ISD::LOAD)
13979  return SDValue();
13980 
13981  auto *LD = cast<LoadSDNode>(Operand);
13982  EVT MemoryType = LD->getMemoryVT();
13983 
13984  // This transformation is only valid if the we are loading either a byte,
13985  // halfword, word, or doubleword.
13986  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13988 
13989  // Ensure that the load from the narrow width is being zero extended to i128.
13990  if (!ValidLDType ||
13991  (LD->getExtensionType() != ISD::ZEXTLOAD &&
13992  LD->getExtensionType() != ISD::EXTLOAD))
13993  return SDValue();
13994 
13995  SDValue LoadOps[] = {
13996  LD->getChain(), LD->getBasePtr(),
13997  DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13998 
14001  LoadOps, MemoryType, LD->getMemOperand());
14002 }
14003 
14004 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14005  DAGCombinerInfo &DCI) const {
14006  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14007  "Should be called with a BUILD_VECTOR node");
14008 
14009  SelectionDAG &DAG = DCI.DAG;
14010  SDLoc dl(N);
14011 
14012  if (!Subtarget.hasVSX())
14013  return SDValue();
14014 
14015  // The target independent DAG combiner will leave a build_vector of
14016  // float-to-int conversions intact. We can generate MUCH better code for
14017  // a float-to-int conversion of a vector of floats.
14018  SDValue FirstInput = N->getOperand(0);
14019  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14020  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14021  if (Reduced)
14022  return Reduced;
14023  }
14024 
14025  // If we're building a vector out of consecutive loads, just load that
14026  // vector type.
14027  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14028  if (Reduced)
14029  return Reduced;
14030 
14031  // If we're building a vector out of extended elements from another vector
14032  // we have P9 vector integer extend instructions. The code assumes legal
14033  // input types (i.e. it can't handle things like v4i16) so do not run before
14034  // legalization.
14035  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14036  Reduced = combineBVOfVecSExt(N, DAG);
14037  if (Reduced)
14038  return Reduced;
14039  }
14040 
14041  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14042  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14043  // is a load from <valid narrow width> to i128.
14044  if (Subtarget.isISA3_1()) {
14045  SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14046  if (BVOfZLoad)
14047  return BVOfZLoad;
14048  }
14049 
14050  if (N->getValueType(0) != MVT::v2f64)
14051  return SDValue();
14052 
14053  // Looking for:
14054  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14055  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14056  FirstInput.getOpcode() != ISD::UINT_TO_FP)
14057  return SDValue();
14058  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14059  N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14060  return SDValue();
14061  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14062  return SDValue();
14063 
14064  SDValue Ext1 = FirstInput.getOperand(0);
14065  SDValue Ext2 = N->getOperand(1).getOperand(0);
14066  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14068  return SDValue();
14069 
14070  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14071  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14072  if (!Ext1Op || !Ext2Op)
14073  return SDValue();
14074  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14075  Ext1.getOperand(0) != Ext2.getOperand(0))
14076  return SDValue();
14077 
14078  int FirstElem = Ext1Op->getZExtValue();
14079  int SecondElem = Ext2Op->getZExtValue();
14080  int SubvecIdx;
14081  if (FirstElem == 0 && SecondElem == 1)
14082  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14083  else if (FirstElem == 2 && SecondElem == 3)
14084  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14085  else
14086  return SDValue();
14087 
14088  SDValue SrcVec = Ext1.getOperand(0);
14089  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14091  return DAG.getNode(NodeType, dl, MVT::v2f64,
14092  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14093 }
14094 
14095 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14096  DAGCombinerInfo &DCI) const {
14097  assert((N->getOpcode() == ISD::SINT_TO_FP ||
14098  N->getOpcode() == ISD::UINT_TO_FP) &&
14099  "Need an int -> FP conversion node here");
14100 
14101  if (useSoftFloat() || !Subtarget.has64BitSupport())
14102  return SDValue();
14103 
14104  SelectionDAG &DAG = DCI.DAG;
14105  SDLoc dl(N);
14106  SDValue Op(N, 0);
14107 
14108  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14109  // from the hardware.
14110  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14111  return SDValue();
14112  if (!Op.getOperand(0).getValueType().isSimple())
14113  return SDValue();
14114  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14115  Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14116  return SDValue();
14117 
14118  SDValue FirstOperand(Op.getOperand(0));
14119  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14120  (FirstOperand.getValueType() == MVT::i8 ||
14121  FirstOperand.getValueType() == MVT::i16);
14122  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14123  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14124  bool DstDouble = Op.getValueType() == MVT::f64;
14125  unsigned ConvOp = Signed ?
14126  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14127  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14128  SDValue WidthConst =
14129  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14130  dl, false);
14131  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14132  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14135  Ops, MVT::i8, LDN->getMemOperand());
14136 
14137  // For signed conversion, we need to sign-extend the value in the VSR
14138  if (Signed) {
14139  SDValue ExtOps[] = { Ld, WidthConst };
14140  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14141  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14142  } else
14143  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14144  }
14145 
14146 
14147  // For i32 intermediate values, unfortunately, the conversion functions
14148  // leave the upper 32 bits of the value are undefined. Within the set of
14149  // scalar instructions, we have no method for zero- or sign-extending the
14150  // value. Thus, we cannot handle i32 intermediate values here.
14151  if (Op.getOperand(0).getValueType() == MVT::i32)
14152  return SDValue();
14153 
14154  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14155  "UINT_TO_FP is supported only with FPCVT");
14156 
14157  // If we have FCFIDS, then use it when converting to single-precision.
14158  // Otherwise, convert to double-precision and then round.
14159  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14160  ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14161  : PPCISD::FCFIDS)
14162  : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14163  : PPCISD::FCFID);
14164  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14165  ? MVT::f32
14166  : MVT::f64;
14167 
14168  // If we're converting from a float, to an int, and back to a float again,
14169  // then we don't need the store/load pair at all.
14170  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14171  Subtarget.hasFPCVT()) ||
14172  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14173  SDValue Src = Op.getOperand(0).getOperand(0);
14174  if (Src.getValueType() == MVT::f32) {
14175  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14176  DCI.AddToWorklist(Src.getNode());
14177  } else if (Src.getValueType() != MVT::f64) {
14178  // Make sure that we don't pick up a ppc_fp128 source value.
14179  return SDValue();
14180  }
14181 
14182  unsigned FCTOp =
14183  Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14185 
14186  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14187  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14188 
14189  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14190  FP = DAG.getNode(ISD::FP_ROUND, dl,
14191  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14192  DCI.AddToWorklist(FP.getNode());
14193  }
14194 
14195  return FP;
14196  }
14197 
14198  return SDValue();
14199 }
14200 
14201 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14202 // builtins) into loads with swaps.
14204  DAGCombinerInfo &DCI) const {
14205  SelectionDAG &DAG = DCI.DAG;
14206  SDLoc dl(N);
14207  SDValue Chain;
14208  SDValue Base;
14209  MachineMemOperand *MMO;
14210 
14211  switch (N->getOpcode()) {
14212  default:
14213  llvm_unreachable("Unexpected opcode for little endian VSX load");
14214  case ISD::LOAD: {
14215  LoadSDNode *LD = cast<LoadSDNode>(N);
14216  Chain = LD->getChain();
14217  Base = LD->getBasePtr();
14218  MMO = LD->getMemOperand();
14219  // If the MMO suggests this isn't a load of a full vector, leave
14220  // things alone. For a built-in, we have to make the change for
14221  // correctness, so if there is a size problem that will be a bug.
14222  if (MMO->getSize() < 16)
14223  return SDValue();
14224  break;
14225  }
14226  case ISD::INTRINSIC_W_CHAIN: {
14227  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14228  Chain = Intrin->getChain();
14229  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14230  // us what we want. Get operand 2 instead.
14231  Base = Intrin->getOperand(2);
14232  MMO = Intrin->getMemOperand();
14233  break;
14234  }
14235  }
14236 
14237  MVT VecTy = N->getValueType(0).getSimpleVT();
14238 
14239  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14240  // aligned and the type is a vector with elements up to 4 bytes
14241  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14242  VecTy.getScalarSizeInBits() <= 32) {
14243  return SDValue();
14244  }
14245 
14246  SDValue LoadOps[] = { Chain, Base };
14249  LoadOps, MVT::v2f64, MMO);
14250 
14251  DCI.AddToWorklist(Load.getNode());
14252  Chain = Load.getValue(1);
14253  SDValue Swap = DAG.getNode(
14254  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14255  DCI.AddToWorklist(Swap.getNode());
14256 
14257  // Add a bitcast if the resulting load type doesn't match v2f64.
14258  if (VecTy != MVT::v2f64) {
14259  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14260  DCI.AddToWorklist(N.getNode());
14261  // Package {bitcast value, swap's chain} to match Load's shape.
14262  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14263  N, Swap.getValue(1));
14264  }
14265 
14266  return Swap;
14267 }
14268 
14269 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14270 // builtins) into stores with swaps.
14272  DAGCombinerInfo &DCI) const {
14273  SelectionDAG &DAG = DCI.DAG;
14274  SDLoc dl(N);
14275  SDValue Chain;
14276  SDValue Base;
14277  unsigned SrcOpnd;
14278  MachineMemOperand *MMO;
14279 
14280  switch (N->getOpcode()) {
14281  default:
14282  llvm_unreachable("Unexpected opcode for little endian VSX store");
14283  case ISD::STORE: {
14284  StoreSDNode *ST = cast<StoreSDNode>(N);
14285  Chain = ST->getChain();
14286  Base = ST->getBasePtr();
14287  MMO = ST->getMemOperand();
14288  SrcOpnd = 1;
14289  // If the MMO suggests this isn't a store of a full vector, leave
14290  // things alone. For a built-in, we have to make the change for
14291  // correctness, so if there is a size problem that will be a bug.
14292  if (MMO->getSize() < 16)
14293  return SDValue();
14294  break;
14295  }
14296  case ISD::INTRINSIC_VOID: {
14297  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14298  Chain = Intrin->getChain();
14299  // Intrin->getBasePtr() oddly does not get what we want.
14300  Base = Intrin->getOperand(3);
14301  MMO = Intrin->getMemOperand();
14302  SrcOpnd = 2;
14303  break;
14304  }
14305  }
14306 
14307  SDValue Src = N->getOperand(SrcOpnd);
14308  MVT VecTy = Src.getValueType().getSimpleVT();
14309 
14310  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14311  // aligned and the type is a vector with elements up to 4 bytes
14312  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14313  VecTy.getScalarSizeInBits() <= 32) {
14314  return SDValue();
14315  }
14316 
14317  // All stores are done as v2f64 and possible bit cast.
14318  if (VecTy != MVT::v2f64) {
14319  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14320  DCI.AddToWorklist(Src.getNode());
14321  }
14322 
14323  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14324  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14325  DCI.AddToWorklist(Swap.getNode());
14326  Chain = Swap.getValue(1);
14327  SDValue StoreOps[] = { Chain, Swap, Base };
14329  DAG.getVTList(MVT::Other),
14330  StoreOps, VecTy, MMO);
14331  DCI.AddToWorklist(Store.getNode());
14332  return Store;
14333 }
14334 
14335 // Handle DAG combine for STORE (FP_TO_INT F).
14336 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14337  DAGCombinerInfo &DCI) const {
14338 
14339  SelectionDAG &DAG = DCI.DAG;
14340  SDLoc dl(N);
14341  unsigned Opcode = N->getOperand(1).getOpcode();
14342 
14343  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14344  && "Not a FP_TO_INT Instruction!");
14345 
14346  SDValue Val = N->getOperand(1).getOperand(0);
14347  EVT Op1VT = N->getOperand(1).getValueType();
14348  EVT ResVT = Val.getValueType();
14349 
14350  if (!isTypeLegal(ResVT))
14351  return SDValue();
14352 
14353  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14354  bool ValidTypeForStoreFltAsInt =
14355  (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14356  (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14357 
14358  if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
14359  return SDValue();
14360 
14361  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14362  cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14363  return SDValue();
14364 
14365  // Extend f32 values to f64
14366  if (ResVT.getScalarSizeInBits() == 32) {
14367  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14368  DCI.AddToWorklist(Val.getNode());
14369  }
14370 
14371  // Set signed or unsigned conversion opcode.
14372  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14375 
14376  Val = DAG.getNode(ConvOpcode,
14377  dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14378  DCI.AddToWorklist(Val.getNode());
14379 
14380  // Set number of bytes being converted.
14381  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14382  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14383  DAG.getIntPtrConstant(ByteSize, dl, false),
14384  DAG.getValueType(Op1VT) };
14385 
14387  DAG.getVTList(MVT::Other), Ops,
14388  cast<StoreSDNode>(N)->getMemoryVT(),
14389  cast<StoreSDNode>(N)->getMemOperand());
14390 
14391  DCI.AddToWorklist(Val.getNode());
14392  return Val;
14393 }
14394 
14395 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14396  // Check that the source of the element keeps flipping
14397  // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14398  bool PrevElemFromFirstVec = Mask[0] < NumElts;
14399  for (int i = 1, e = Mask.size(); i < e; i++) {
14400  if (PrevElemFromFirstVec && Mask[i] < NumElts)
14401  return false;
14402  if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14403  return false;
14404  PrevElemFromFirstVec = !PrevElemFromFirstVec;
14405  }
14406  return true;
14407 }
14408 
14409 static bool isSplatBV(SDValue Op) {
14410  if (Op.getOpcode() != ISD::BUILD_VECTOR)
14411  return false;
14412  SDValue FirstOp;
14413 
14414  // Find first non-undef input.
14415  for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14416  FirstOp = Op.getOperand(i);
14417  if (!FirstOp.isUndef())
14418  break;
14419  }
14420 
14421  // All inputs are undef or the same as the first non-undef input.
14422  for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14423  if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14424  return false;
14425  return true;
14426 }
14427 
14429  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14430  return Op;
14431  if (Op.getOpcode() != ISD::BITCAST)
14432  return SDValue();
14433  Op = Op.getOperand(0);
14434  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14435  return Op;
14436  return SDValue();
14437 }
14438 
14439 // Fix up the shuffle mask to account for the fact that the result of
14440 // scalar_to_vector is not in lane zero. This just takes all values in
14441 // the ranges specified by the min/max indices and adds the number of
14442 // elements required to ensure each element comes from the respective
14443 // position in the valid lane.
14444 // On little endian, that's just the corresponding element in the other
14445 // half of the vector. On big endian, it is in the same half but right
14446 // justified rather than left justified in that half.
14448  int LHSMaxIdx, int RHSMinIdx,
14449  int RHSMaxIdx, int HalfVec,
14450  unsigned ValidLaneWidth,
14451  const PPCSubtarget &Subtarget) {
14452  for (int i = 0, e = ShuffV.size(); i < e; i++) {
14453  int Idx = ShuffV[i];
14454  if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14455  ShuffV[i] +=
14456  Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14457  }
14458 }
14459 
14460 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14461 // the original is:
14462 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14463 // In such a case, just change the shuffle mask to extract the element
14464 // from the permuted index.
14466  const PPCSubtarget &Subtarget) {
14467  SDLoc dl(OrigSToV);
14468  EVT VT = OrigSToV.getValueType();
14469  assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14470  "Expecting a SCALAR_TO_VECTOR here");
14471  SDValue Input = OrigSToV.getOperand(0);
14472 
14473  if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14474  ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14475  SDValue OrigVector = Input.getOperand(0);
14476 
14477  // Can't handle non-const element indices or different vector types
14478  // for the input to the extract and the output of the scalar_to_vector.
14479  if (Idx && VT == OrigVector.getValueType()) {
14480  unsigned NumElts = VT.getVectorNumElements();
14481  assert(
14482  NumElts > 1 &&
14483  "Cannot produce a permuted scalar_to_vector for one element vector");
14484  SmallVector<int, 16> NewMask(NumElts, -1);
14485  unsigned ResultInElt = NumElts / 2;
14486  ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
14487  NewMask[ResultInElt] = Idx->getZExtValue();
14488  return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14489  }
14490  }
14491  return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14492  OrigSToV.getOperand(0));
14493 }
14494 
14495 // On little endian subtargets, combine shuffles such as:
14496 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14497 // into:
14498 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14499 // because the latter can be matched to a single instruction merge.
14500 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14501 // to put the value into element zero. Adjust the shuffle mask so that the
14502 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14503 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
14504 // nodes with elements smaller than doubleword because all the ways
14505 // of getting scalar data into a vector register put the value in the
14506 // rightmost element of the left half of the vector.
14507 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14508  SelectionDAG &DAG) const {
14509  SDValue LHS = SVN->getOperand(0);
14510  SDValue RHS = SVN->getOperand(1);
14511  auto Mask = SVN->getMask();
14512  int NumElts = LHS.getValueType().getVectorNumElements();
14513  SDValue Res(SVN, 0);
14514  SDLoc dl(SVN);
14515  bool IsLittleEndian = Subtarget.isLittleEndian();
14516 
14517  // On big endian targets this is only useful for subtargets with direct moves.
14518  // On little endian targets it would be useful for all subtargets with VSX.
14519  // However adding special handling for LE subtargets without direct moves
14520  // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
14521  // which includes direct moves.
14522  if (!Subtarget.hasDirectMove())
14523  return Res;
14524 
14525  // If this is not a shuffle of a shuffle and the first element comes from
14526  // the second vector, canonicalize to the commuted form. This will make it
14527  // more likely to match one of the single instruction patterns.
14528  if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14529  RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14530  std::swap(LHS, RHS);
14531  Res = DAG.getCommutedVectorShuffle(*SVN);
14532  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14533  }
14534 
14535  // Adjust the shuffle mask if either input vector comes from a
14536  // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14537  // form (to prevent the need for a swap).
14538  SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14539  SDValue SToVLHS = isScalarToVec(LHS);
14540  SDValue SToVRHS = isScalarToVec(RHS);
14541  if (SToVLHS || SToVRHS) {
14542  int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14543  : SToVRHS.getValueType().getVectorNumElements();
14544  int NumEltsOut = ShuffV.size();
14545  // The width of the "valid lane" (i.e. the lane that contains the value that
14546  // is vectorized) needs to be expressed in terms of the number of elements
14547  // of the shuffle. It is thereby the ratio of the values before and after
14548  // any bitcast.
14549  unsigned ValidLaneWidth =
14550  SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
14552  : SToVRHS.getValueType().getScalarSizeInBits() /
14554 
14555  // Initially assume that neither input is permuted. These will be adjusted
14556  // accordingly if either input is.
14557  int LHSMaxIdx = -1;
14558  int RHSMinIdx = -1;
14559  int RHSMaxIdx = -1;
14560  int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14561 
14562  // Get the permuted scalar to vector nodes for the source(s) that come from
14563  // ISD::SCALAR_TO_VECTOR.
14564  // On big endian systems, this only makes sense for element sizes smaller
14565  // than 64 bits since for 64-bit elements, all instructions already put
14566  // the value into element zero. Since scalar size of LHS and RHS may differ
14567  // after isScalarToVec, this should be checked using their own sizes.
14568  if (SToVLHS) {
14569  if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
14570  return Res;
14571  // Set up the values for the shuffle vector fixup.
14572  LHSMaxIdx = NumEltsOut / NumEltsIn;
14573  SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
14574  if (SToVLHS.getValueType() != LHS.getValueType())
14575  SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14576  LHS = SToVLHS;
14577  }
14578  if (SToVRHS) {
14579  if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
14580  return Res;
14581  RHSMinIdx = NumEltsOut;
14582  RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14583  SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
14584  if (SToVRHS.getValueType() != RHS.getValueType())
14585  SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14586  RHS = SToVRHS;
14587  }
14588 
14589  // Fix up the shuffle mask to reflect where the desired element actually is.
14590  // The minimum and maximum indices that correspond to element zero for both
14591  // the LHS and RHS are computed and will control which shuffle mask entries
14592  // are to be changed. For example, if the RHS is permuted, any shuffle mask
14593  // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
14594  fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14595  HalfVec, ValidLaneWidth, Subtarget);
14596  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14597 
14598  // We may have simplified away the shuffle. We won't be able to do anything
14599  // further with it here.
14600  if (!isa<ShuffleVectorSDNode>(Res))
14601  return Res;
14602  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14603  }
14604 
14605  SDValue TheSplat = IsLittleEndian ? RHS : LHS;
14606  // The common case after we commuted the shuffle is that the RHS is a splat
14607  // and we have elements coming in from the splat at indices that are not
14608  // conducive to using a merge.
14609  // Example:
14610  // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14611  if (!isSplatBV(TheSplat))
14612  return Res;
14613 
14614  // We are looking for a mask such that all even elements are from
14615  // one vector and all odd elements from the other.
14616  if (!isAlternatingShuffMask(Mask, NumElts))
14617  return Res;
14618 
14619  // Adjust the mask so we are pulling in the same index from the splat
14620  // as the index from the interesting vector in consecutive elements.
14621  if (IsLittleEndian) {
14622  // Example (even elements from first vector):
14623  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14624  if (Mask[0] < NumElts)
14625  for (int i = 1, e = Mask.size(); i < e; i += 2)
14626  ShuffV[i] = (ShuffV[i - 1] + NumElts);
14627  // Example (odd elements from first vector):
14628  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14629  else
14630  for (int i = 0, e = Mask.size(); i < e; i += 2)
14631  ShuffV[i] = (ShuffV[i + 1] + NumElts);
14632  } else {
14633  // Example (even elements from first vector):
14634  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
14635  if (Mask[0] < NumElts)
14636  for (int i = 0, e = Mask.size(); i < e; i += 2)
14637  ShuffV[i] = ShuffV[i + 1] - NumElts;
14638  // Example (odd elements from first vector):
14639  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
14640  else
14641  for (int i = 1, e = Mask.size(); i < e; i += 2)
14642  ShuffV[i] = ShuffV[i - 1] - NumElts;
14643  }
14644 
14645  // If the RHS has undefs, we need to remove them since we may have created
14646  // a shuffle that adds those instead of the splat value.
14647  SDValue SplatVal =
14648  cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
14649  TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
14650 
14651  if (IsLittleEndian)
14652  RHS = TheSplat;
14653  else
14654  LHS = TheSplat;
14655  return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14656 }
14657 
14658 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14659  LSBaseSDNode *LSBase,
14660  DAGCombinerInfo &DCI) const {
14661  assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14662  "Not a reverse memop pattern!");
14663 
14664  auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14665  auto Mask = SVN->getMask();
14666  int i = 0;
14667  auto I = Mask.rbegin();
14668  auto E = Mask.rend();
14669 
14670  for (; I != E; ++I) {
14671  if (*I != i)
14672  return false;
14673  i++;
14674  }
14675  return true;
14676  };
14677 
14678  SelectionDAG &DAG = DCI.DAG;
14679  EVT VT = SVN->getValueType(0);
14680 
14681  if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14682  return SDValue();
14683 
14684  // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14685  // See comment in PPCVSXSwapRemoval.cpp.
14686  // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14687  if (!Subtarget.hasP9Vector())
14688  return SDValue();
14689 
14690  if(!IsElementReverse(SVN))
14691  return SDValue();
14692 
14693  if (LSBase->getOpcode() == ISD::LOAD) {
14694  // If the load return value 0 has more than one user except the
14695  // shufflevector instruction, it is not profitable to replace the
14696  // shufflevector with a reverse load.
14697  for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
14698  UI != UE; ++UI)
14699  if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
14700  return SDValue();
14701 
14702  SDLoc dl(LSBase);
14703  SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14704  return DAG.getMemIntrinsicNode(
14705  PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14706  LSBase->getMemoryVT(), LSBase->getMemOperand());
14707  }
14708 
14709  if (LSBase->getOpcode() == ISD::STORE) {
14710  // If there are other uses of the shuffle, the swap cannot be avoided.
14711  // Forcing the use of an X-Form (since swapped stores only have
14712  // X-Forms) without removing the swap is unprofitable.
14713  if (!SVN->hasOneUse())
14714  return SDValue();
14715 
14716  SDLoc dl(LSBase);
14717  SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14718  LSBase->getBasePtr()};
14719  return DAG.getMemIntrinsicNode(
14720  PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14721  LSBase->getMemoryVT(), LSBase->getMemOperand());
14722  }
14723 
14724  llvm_unreachable("Expected a load or store node here");
14725 }
14726 
14728  DAGCombinerInfo &DCI) const {
14729  SelectionDAG &DAG = DCI.DAG;
14730  SDLoc dl(N);
14731  switch (N->getOpcode()) {
14732  default: break;
14733  case ISD::ADD:
14734  return combineADD(N, DCI);
14735  case ISD::SHL:
14736  return combineSHL(N, DCI);
14737  case ISD::SRA:
14738  return combineSRA(N, DCI);
14739  case ISD::SRL:
14740  return combineSRL(N, DCI);
14741  case ISD::MUL:
14742  return combineMUL(N, DCI);
14743  case ISD::FMA:
14744  case PPCISD::FNMSUB:
14745  return combineFMALike(N, DCI);
14746  case PPCISD::SHL:
14747  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14748  return N->getOperand(0);
14749  break;
14750  case PPCISD::SRL:
14751  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14752  return N->getOperand(0);
14753  break;
14754  case PPCISD::SRA:
14755  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14756  if (C->isNullValue() || // 0 >>s V -> 0.
14757  C->isAllOnesValue()) // -1 >>s V -> -1.
14758  return N->getOperand(0);
14759  }
14760  break;
14761  case ISD::SIGN_EXTEND:
14762  case ISD::ZERO_EXTEND:
14763  case ISD::ANY_EXTEND:
14764  return DAGCombineExtBoolTrunc(N, DCI);
14765  case ISD::TRUNCATE:
14766  return combineTRUNCATE(N, DCI);
14767  case ISD::SETCC:
14768  if (SDValue CSCC = combineSetCC(N, DCI))
14769  return CSCC;
14771  case ISD::SELECT_CC:
14772  return DAGCombineTruncBoolExt(N, DCI);
14773  case ISD::SINT_TO_FP:
14774  case ISD::UINT_TO_FP:
14775  return combineFPToIntToFP(N, DCI);
14776  case ISD::VECTOR_SHUFFLE:
14777  if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14778  LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14779  return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14780  }
14781  return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14782  case ISD::STORE: {
14783 
14784  EVT Op1VT = N->getOperand(1).getValueType();
14785  unsigned Opcode = N->getOperand(1).getOpcode();
14786 
14787  if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14788  SDValue Val= combineStoreFPToInt(N, DCI);
14789  if (Val)
14790  return Val;
14791  }
14792 
14793  if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14794  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14795  SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14796  if (Val)
14797  return Val;
14798  }
14799 
14800  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14801  if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14802  N->getOperand(1).getNode()->hasOneUse() &&
14803  (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14804  (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14805 
14806  // STBRX can only handle simple types and it makes no sense to store less
14807  // two bytes in byte-reversed order.
14808  EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14809  if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14810  break;
14811 
14812  SDValue BSwapOp = N->getOperand(1).getOperand(0);
14813  // Do an any-extend to 32-bits if this is a half-word input.
14814  if (BSwapOp.getValueType() == MVT::i16)
14815  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14816 
14817  // If the type of BSWAP operand is wider than stored memory width
14818  // it need to be shifted to the right side before STBRX.
14819  if (Op1VT.bitsGT(mVT)) {
14820  int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14821  BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14822  DAG.getConstant(Shift, dl, MVT::i32));
14823  // Need to truncate if this is a bswap of i64 stored as i32/i16.
14824  if (Op1VT == MVT::i64)
14825  BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14826  }
14827 
14828  SDValue Ops[] = {
14829  N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14830  };
14831  return
14833  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14834  cast<StoreSDNode>(N)->getMemOperand());
14835  }
14836 
14837  // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14838  // So it can increase the chance of CSE constant construction.
14839  if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14840  isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14841  // Need to sign-extended to 64-bits to handle negative values.
14842  EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14843  uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14844  MemVT.getSizeInBits());
14845  SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14846 
14847  // DAG.getTruncStore() can't be used here because it doesn't accept
14848  // the general (base + offset) addressing mode.
14849  // So we use UpdateNodeOperands and setTruncatingStore instead.
14850  DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14851  N->getOperand(3));
14852  cast<StoreSDNode>(N)->setTruncatingStore(true);
14853  return SDValue(N, 0);
14854  }
14855 
14856  // For little endian, VSX stores require generating xxswapd/lxvd2x.
14857  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14858  if (Op1VT.isSimple()) {
14859  MVT StoreVT = Op1VT.getSimpleVT();
14860  if (Subtarget.needsSwapsForVSXMemOps() &&
14861  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14862  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14863  return expandVSXStoreForLE(N, DCI);
14864  }
14865  break;
14866  }
14867  case ISD::LOAD: {
14868  LoadSDNode *LD = cast<LoadSDNode>(N);
14869  EVT VT = LD->getValueType(0);
14870 
14871  // For little endian, VSX loads require generating lxvd2x/xxswapd.
14872  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14873  if (VT.isSimple()) {
14874  MVT LoadVT = VT.getSimpleVT();
14875  if (Subtarget.needsSwapsForVSXMemOps() &&
14876  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14877  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14878  return expandVSXLoadForLE(N, DCI);
14879  }
14880 
14881  // We sometimes end up with a 64-bit integer load, from which we extract
14882  // two single-precision floating-point numbers. This happens with
14883  // std::complex<float>, and other similar structures, because of the way we
14884  // canonicalize structure copies. However, if we lack direct moves,
14885  // then the final bitcasts from the extracted integer values to the
14886  // floating-point numbers turn into store/load pairs. Even with direct moves,
14887  // just loading the two floating-point numbers is likely better.
14888  auto ReplaceTwoFloatLoad = [&]() {
14889  if (VT != MVT::i64)
14890  return false;
14891 
14892  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14893  LD->isVolatile())
14894  return false;
14895 
14896  // We're looking for a sequence like this:
14897  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14898  // t16: i64 = srl t13, Constant:i32<32>
14899  // t17: i32 = truncate t16
14900  // t18: f32 = bitcast t17
14901  // t19: i32 = truncate t13
14902  // t20: f32 = bitcast t19
14903 
14904  if (!LD->hasNUsesOfValue(2, 0))
14905  return false;
14906 
14907  auto UI = LD->use_begin();
14908  while (UI.getUse().getResNo() != 0) ++UI;
14909  SDNode *Trunc = *UI++;
14910  while (UI.getUse().getResNo() != 0) ++UI;
14911  SDNode *RightShift = *UI;
14912  if (Trunc->getOpcode() != ISD::TRUNCATE)
14913  std::swap(Trunc, RightShift);
14914 
14915  if (Trunc->getOpcode() != ISD::TRUNCATE ||
14916  Trunc->getValueType(0) != MVT::i32 ||
14917  !Trunc->hasOneUse())
14918  return false;
14919  if (RightShift->getOpcode() != ISD::SRL ||
14920  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14921  RightShift->getConstantOperandVal(1) != 32 ||
14922  !RightShift->hasOneUse())
14923  return false;
14924 
14925  SDNode *Trunc2 = *RightShift->use_begin();
14926  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14927  Trunc2->getValueType(0) != MVT::i32 ||
14928  !Trunc2->hasOneUse())
14929  return false;
14930 
14931  SDNode *Bitcast = *Trunc->use_begin();
14932  SDNode *Bitcast2 = *Trunc2->use_begin();
14933 
14934  if (Bitcast->getOpcode() != ISD::BITCAST ||
14935  Bitcast->getValueType(0) != MVT::f32)
14936  return false;
14937  if (Bitcast2->getOpcode() != ISD::BITCAST ||
14938  Bitcast2->getValueType(0) != MVT::f32)
14939  return false;
14940 
14941  if (Subtarget.isLittleEndian())
14942  std::swap(Bitcast, Bitcast2);
14943 
14944  // Bitcast has the second float (in memory-layout order) and Bitcast2
14945  // has the first one.
14946 
14947  SDValue BasePtr = LD->getBasePtr();
14948  if (LD->isIndexed()) {
14949  assert(LD->getAddressingMode() == ISD::PRE_INC &&
14950  "Non-pre-inc AM on PPC?");
14951  BasePtr =
14952  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14953  LD->getOffset());
14954  }
14955 
14956  auto MMOFlags =
14957  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14958  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14959  LD->getPointerInfo(), LD->getAlignment(),
14960  MMOFlags, LD->getAAInfo());
14961  SDValue AddPtr =
14962  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14963  BasePtr, DAG.getIntPtrConstant(4, dl));
14964  SDValue FloatLoad2 = DAG.getLoad(
14965  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14966  LD->getPointerInfo().getWithOffset(4),
14967  MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14968 
14969  if (LD->isIndexed()) {
14970  // Note that DAGCombine should re-form any pre-increment load(s) from
14971  // what is produced here if that makes sense.
14972  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14973  }
14974 
14975  DCI.CombineTo(Bitcast2, FloatLoad);
14976  DCI.CombineTo(Bitcast, FloatLoad2);
14977 
14978  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14979  SDValue(FloatLoad2.getNode(), 1));
14980  return true;
14981  };
14982 
14983  if (ReplaceTwoFloatLoad())
14984  return SDValue(N, 0);
14985 
14986  EVT MemVT = LD->getMemoryVT();
14987  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14988  Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14989  if (LD->isUnindexed() && VT.isVector() &&
14990  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14991  // P8 and later hardware should just use LOAD.
14992  !Subtarget.hasP8Vector() &&
14993  (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14994  VT == MVT::v4f32))) &&
14995  LD->getAlign() < ABIAlignment) {
14996  // This is a type-legal unaligned Altivec load.
14997  SDValue Chain = LD->getChain();
14998  SDValue Ptr = LD->getBasePtr();
14999  bool isLittleEndian = Subtarget.isLittleEndian();
15000 
15001  // This implements the loading of unaligned vectors as described in
15002  // the venerable Apple Velocity Engine overview. Specifically:
15003  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15004  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15005  //
15006  // The general idea is to expand a sequence of one or more unaligned
15007  // loads into an alignment-based permutation-control instruction (lvsl
15008  // or lvsr), a series of regular vector loads (which always truncate
15009  // their input address to an aligned address), and a series of
15010  // permutations. The results of these permutations are the requested
15011  // loaded values. The trick is that the last "extra" load is not taken
15012  // from the address you might suspect (sizeof(vector) bytes after the
15013  // last requested load), but rather sizeof(vector) - 1 bytes after the
15014  // last requested vector. The point of this is to avoid a page fault if
15015  // the base address happened to be aligned. This works because if the
15016  // base address is aligned, then adding less than a full vector length
15017  // will cause the last vector in the sequence to be (re)loaded.
15018  // Otherwise, the next vector will be fetched as you might suspect was
15019  // necessary.
15020 
15021  // We might be able to reuse the permutation generation from
15022  // a different base address offset from this one by an aligned amount.
15023  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15024  // optimization later.
15025  Intrinsic::ID Intr, IntrLD, IntrPerm;
15026  MVT PermCntlTy, PermTy, LDTy;
15027  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15028  : Intrinsic::ppc_altivec_lvsl;
15029  IntrLD = Intrinsic::ppc_altivec_lvx;
15030  IntrPerm = Intrinsic::ppc_altivec_vperm;
15031  PermCntlTy = MVT::v16i8;
15032  PermTy = MVT::v4i32;
15033  LDTy = MVT::v4i32;
15034 
15035  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15036 
15037  // Create the new MMO for the new base load. It is like the original MMO,
15038  // but represents an area in memory almost twice the vector size centered
15039  // on the original address. If the address is unaligned, we might start
15040  // reading up to (sizeof(vector)-1) bytes below the address of the
15041  // original unaligned load.
15042  MachineFunction &MF = DAG.getMachineFunction();
15043  MachineMemOperand *BaseMMO =
15044  MF.getMachineMemOperand(LD->getMemOperand(),
15045  -(long)MemVT.getStoreSize()+1,
15046  2*MemVT.getStoreSize()-1);
15047 
15048  // Create the new base load.
15049  SDValue LDXIntID =
15050  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15051  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15052  SDValue BaseLoad =
15054  DAG.getVTList(PermTy, MVT::Other),
15055  BaseLoadOps, LDTy, BaseMMO);
15056 
15057  // Note that the value of IncOffset (which is provided to the next
15058  // load's pointer info offset value, and thus used to calculate the
15059  // alignment), and the value of IncValue (which is actually used to
15060  // increment the pointer value) are different! This is because we
15061  // require the next load to appear to be aligned, even though it
15062  // is actually offset from the base pointer by a lesser amount.
15063  int IncOffset = VT.getSizeInBits() / 8;
15064  int IncValue = IncOffset;
15065 
15066  // Walk (both up and down) the chain looking for another load at the real
15067  // (aligned) offset (the alignment of the other load does not matter in
15068  // this case). If found, then do not use the offset reduction trick, as
15069  // that will prevent the loads from being later combined (as they would
15070  // otherwise be duplicates).
15071  if (!findConsecutiveLoad(LD, DAG))
15072  --IncValue;
15073 
15074  SDValue Increment =
15075  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15076  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15077 
15078  MachineMemOperand *ExtraMMO =
15079  MF.getMachineMemOperand(LD->getMemOperand(),
15080  1, 2*MemVT.getStoreSize()-1);
15081  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15082  SDValue ExtraLoad =
15084  DAG.getVTList(PermTy, MVT::Other),
15085  ExtraLoadOps, LDTy, ExtraMMO);
15086 
15088  BaseLoad.getValue(1), ExtraLoad.getValue(1));
15089 
15090  // Because vperm has a big-endian bias, we must reverse the order
15091  // of the input vectors and complement the permute control vector
15092  // when generating little endian code. We have already handled the
15093  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15094  // and ExtraLoad here.
15095  SDValue Perm;
15096  if (isLittleEndian)
15097  Perm = BuildIntrinsicOp(IntrPerm,
15098  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15099  else
15100  Perm = BuildIntrinsicOp(IntrPerm,
15101  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15102 
15103  if (VT != PermTy)
15104  Perm = Subtarget.hasAltivec()
15105  ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15106  : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15107  DAG.getTargetConstant(1, dl, MVT::i64));
15108  // second argument is 1 because this rounding
15109  // is always exact.
15110 
15111  // The output of the permutation is our loaded result, the TokenFactor is
15112  // our new chain.
15113  DCI.CombineTo(N, Perm, TF);
15114  return SDValue(N, 0);
15115  }
15116  }
15117  break;
15118  case ISD::INTRINSIC_WO_CHAIN: {
15119  bool isLittleEndian = Subtarget.isLittleEndian();
15120  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15121  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15122  : Intrinsic::ppc_altivec_lvsl);
15123  if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15124  SDValue Add = N->getOperand(1);
15125 
15126  int Bits = 4 /* 16 byte alignment */;
15127 
15128  if (DAG.MaskedValueIsZero(Add->getOperand(1),
15129  APInt::getAllOnesValue(Bits /* alignment */)
15130  .zext(Add.getScalarValueSizeInBits()))) {
15131  SDNode *BasePtr = Add->getOperand(0).getNode();
15132  for (SDNode::use_iterator UI = BasePtr->use_begin(),
15133  UE = BasePtr->use_end();
15134  UI != UE; ++UI) {
15135  if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15136  cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15137  IID) {
15138  // We've found another LVSL/LVSR, and this address is an aligned
15139  // multiple of that one. The results will be the same, so use the
15140  // one we've just found instead.
15141 
15142  return SDValue(*UI, 0);
15143  }
15144  }
15145  }
15146 
15147  if (isa<ConstantSDNode>(Add->getOperand(1))) {
15148  SDNode *BasePtr = Add->getOperand(0).getNode();
15149  for (SDNode::use_iterator UI = BasePtr->use_begin(),
15150  UE = BasePtr->use_end(); UI != UE; ++UI) {
15151  if (UI->getOpcode() == ISD::ADD &&
15152  isa<ConstantSDNode>(UI->getOperand(1)) &&
15153  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15154  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15155  (1ULL << Bits) == 0) {
15156  SDNode *OtherAdd = *UI;
15157  for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15158  VE = OtherAdd->use_end(); VI != VE; ++VI) {
15159  if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15160  cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15161  return SDValue(*VI, 0);
15162  }
15163  }
15164  }
15165  }
15166  }
15167  }
15168 
15169  // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15170  // Expose the vabsduw/h/b opportunity for down stream
15171  if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15172  (IID == Intrinsic::ppc_altivec_vmaxsw ||
15173  IID == Intrinsic::ppc_altivec_vmaxsh ||
15174  IID == Intrinsic::ppc_altivec_vmaxsb)) {
15175  SDValue V1 = N->getOperand(1);
15176  SDValue V2 = N->getOperand(2);
15177  if ((V1.getSimpleValueType() == MVT::v4i32 ||
15178  V1.getSimpleValueType() == MVT::v8i16 ||
15179  V1.getSimpleValueType() == MVT::v16i8) &&
15180  V1.getSimpleValueType() == V2.getSimpleValueType()) {
15181  // (0-a, a)
15182  if (V1.getOpcode() == ISD::SUB &&
15184  V1.getOperand(1) == V2) {
15185  return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15186  }
15187  // (a, 0-a)
15188  if (V2.getOpcode() == ISD::SUB &&
15189  ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15190  V2.getOperand(1) == V1) {
15191  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15192  }
15193  // (x-y, y-x)
15194  if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15195  V1.getOperand(0) == V2.getOperand(1) &&
15196  V1.getOperand(1) == V2.getOperand(0)) {
15197  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15198  }
15199  }
15200  }
15201  }
15202 
15203  break;
15205  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15206  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15207  if (Subtarget.needsSwapsForVSXMemOps()) {
15208  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15209  default:
15210  break;
15211  case Intrinsic::ppc_vsx_lxvw4x:
15212  case Intrinsic::ppc_vsx_lxvd2x:
15213  return expandVSXLoadForLE(N, DCI);
15214  }
15215  }
15216  break;
15217  case ISD::INTRINSIC_VOID:
15218  // For little endian, VSX stores require generating xxswapd/stxvd2x.
15219  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15220  if (Subtarget.needsSwapsForVSXMemOps()) {
15221  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15222  default:
15223  break;
15224  case Intrinsic::ppc_vsx_stxvw4x:
15225  case Intrinsic::ppc_vsx_stxvd2x:
15226  return expandVSXStoreForLE(N, DCI);
15227  }
15228  }
15229  break;
15230  case ISD::BSWAP: {
15231  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15232  // For subtargets without LDBRX, we can still do better than the default
15233  // expansion even for 64-bit BSWAP (LOAD).
15234  bool Is64BitBswapOn64BitTgt =
15235  Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
15236  bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
15237  N->getOperand(0).hasOneUse();
15238  if (IsSingleUseNormalLd &&
15239  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15240  (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15241  SDValue Load = N->getOperand(0);
15242  LoadSDNode *LD = cast<LoadSDNode>(Load);
15243  // Create the byte-swapping load.
15244  SDValue Ops[] = {
15245  LD->getChain(), // Chain
15246  LD->getBasePtr(), // Ptr
15247  DAG.getValueType(N->getValueType(0)) // VT
15248  };
15249  SDValue BSLoad =
15251  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15253  Ops, LD->getMemoryVT(), LD->getMemOperand());
15254 
15255  // If this is an i16 load, insert the truncate.
15256  SDValue ResVal = BSLoad;
15257  if (N->getValueType(0) == MVT::i16)
15258  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15259 
15260  // First, combine the bswap away. This makes the value produced by the
15261  // load dead.
15262  DCI.CombineTo(N, ResVal);
15263 
15264  // Next, combine the load away, we give it a bogus result value but a real
15265  // chain result. The result value is dead because the bswap is dead.
15266  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15267 
15268  // Return N so it doesn't get rechecked!
15269  return SDValue(N, 0);
15270  }
15271  // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
15272  // before legalization so that the BUILD_PAIR is handled correctly.
15273  if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
15274  !IsSingleUseNormalLd)
15275  return SDValue();
15276  LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
15277 
15278  // Can't split volatile or atomic loads.
15279  if (!LD->isSimple())
15280  return SDValue();
15281  SDValue BasePtr = LD->getBasePtr();
15282  SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
15283  LD->getPointerInfo(), LD->getAlignment());
15284  Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
15285  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15286  DAG.getIntPtrConstant(4, dl));
15288  LD->getMemOperand(), 4, 4);
15289  SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
15290  Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
15291  SDValue Res;
15292  if (Subtarget.isLittleEndian())
15293  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
15294  else
15295  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
15296  SDValue TF =
15298  Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15299  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
15300  return Res;
15301  }
15302  case PPCISD::VCMP:
15303  // If a VCMP_rec node already exists with exactly the same operands as this
15304  // node, use its result instead of this node (VCMP_rec computes both a CR6
15305  // and a normal output).
15306  //
15307  if (!N->getOperand(0).hasOneUse() &&
15308  !N->getOperand(1).hasOneUse() &&
15309  !N->getOperand(2).hasOneUse()) {
15310 
15311  // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15312  SDNode *VCMPrecNode = nullptr;
15313 
15314  SDNode *LHSN = N->getOperand(0).getNode();
15315  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15316  UI != E; ++UI)
15317  if (UI->getOpcode() == PPCISD::VCMP_rec &&
15318  UI->getOperand(1) == N->getOperand(1) &&
15319  UI->getOperand(2) == N->getOperand(2) &&
15320  UI->getOperand(0) == N->getOperand(0)) {
15321  VCMPrecNode = *UI;
15322  break;
15323  }
15324 
15325  // If there is no VCMP_rec node, or if the flag value has a single use,
15326  // don't transform this.
15327  if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15328  break;
15329 
15330  // Look at the (necessarily single) use of the flag value. If it has a
15331  // chain, this transformation is more complex. Note that multiple things
15332  // could use the value result, which we should ignore.
15333  SDNode *FlagUser = nullptr;
15334  for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15335  FlagUser == nullptr; ++UI) {
15336  assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15337  SDNode *User = *UI;
15338  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15339  if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15340  FlagUser = User;
15341  break;
15342  }
15343  }
15344  }
15345 
15346  // If the user is a MFOCRF instruction, we know this is safe.
15347  // Otherwise we give up for right now.
15348  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15349  return SDValue(VCMPrecNode, 0);
15350  }
15351  break;
15352  case ISD::BRCOND: {
15353  SDValue Cond = N->getOperand(1);
15354  SDValue Target = N->getOperand(2);
15355 
15356  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15357  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15358  Intrinsic::loop_decrement) {
15359 
15360  // We now need to make the intrinsic dead (it cannot be instruction
15361  // selected).
15362  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15363  assert(Cond.getNode()->hasOneUse() &&
15364  "Counter decrement has more than one use");
15365 
15366  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15367  N->getOperand(0), Target);
15368  }
15369  }
15370  break;
15371  case ISD::BR_CC: {
15372  // If this is a branch on an altivec predicate comparison, lower this so
15373  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15374  // lowering is done pre-legalize, because the legalizer lowers the predicate
15375  // compare down to code that is difficult to reassemble.
15376  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15377  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15378 
15379  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15380  // value. If so, pass-through the AND to get to the intrinsic.
15381  if (LHS.getOpcode() == ISD::AND &&
15383  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15384  Intrinsic::loop_decrement &&
15385  isa<ConstantSDNode>(LHS.getOperand(1)) &&
15386  !isNullConstant(LHS.getOperand(1)))
15387  LHS = LHS.getOperand(0);
15388 
15389  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15390  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15391  Intrinsic::loop_decrement &&
15392  isa<ConstantSDNode>(RHS)) {
15393  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15394  "Counter decrement comparison is not EQ or NE");
15395 
15396  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15397  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15398  (CC == ISD::SETNE && !Val);
15399 
15400  // We now need to make the intrinsic dead (it cannot be instruction
15401  // selected).
15402  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15403  assert(LHS.getNode()->hasOneUse() &&
15404  "Counter decrement has more than one use");
15405 
15406  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15407  N->getOperand(0), N->getOperand(4));
15408  }
15409 
15410  int CompareOpc;
15411  bool isDot;
15412 
15413  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15414  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15415  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15416  assert(isDot && "Can't compare against a vector result!");
15417 
15418  // If this is a comparison against something other than 0/1, then we know
15419  // that the condition is never/always true.
15420  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15421  if (Val != 0 && Val != 1) {
15422  if (CC == ISD::SETEQ) // Cond never true, remove branch.
15423  return N->getOperand(0);
15424  // Always !=, turn it into an unconditional branch.
15425  return DAG.getNode(ISD::BR, dl, MVT::Other,
15426  N->getOperand(0), N->getOperand(4));
15427  }
15428 
15429  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15430 
15431  // Create the PPCISD altivec 'dot' comparison node.
15432  SDValue Ops[] = {
15433  LHS.getOperand(2), // LHS of compare
15434  LHS.getOperand(3), // RHS of compare
15435  DAG.getConstant(CompareOpc, dl, MVT::i32)
15436  };
15437  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15438  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15439 
15440  // Unpack the result based on how the target uses it.
15441  PPC::Predicate CompOpc;
15442  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15443  default: // Can't happen, don't crash on invalid number though.
15444  case 0: // Branch on the value of the EQ bit of CR6.
15445  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15446  break;
15447  case 1: // Branch on the inverted value of the EQ bit of CR6.
15448  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15449  break;
15450  case 2: // Branch on the value of the LT bit of CR6.
15451  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15452  break;
15453  case 3: // Branch on the inverted value of the LT bit of CR6.
15454  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15455  break;
15456  }
15457 
15458  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15459  DAG.getConstant(CompOpc, dl, MVT::i32),
15460  DAG.getRegister(PPC::CR6, MVT::i32),
15461  N->getOperand(4), CompNode.getValue(1));
15462  }
15463  break;
15464  }
15465  case ISD::BUILD_VECTOR:
15466  return DAGCombineBuildVector(N, DCI);
15467  case ISD::ABS:
15468  return combineABS(N, DCI);
15469  case ISD::VSELECT:
15470  return combineVSelect(N, DCI);
15471  }
15472 
15473  return SDValue();
15474 }
15475 
15476 SDValue
15478  SelectionDAG &DAG,
15479  SmallVectorImpl<SDNode *> &Created) const {
15480  // fold (sdiv X, pow2)
15481  EVT VT = N->getValueType(0);
15482  if (VT == MVT::i64 && !Subtarget.isPPC64())
15483  return SDValue();
15484  if ((VT != MVT::i32 && VT != MVT::i64) ||
15485  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15486  return SDValue();
15487 
15488  SDLoc DL(N);
15489  SDValue N0 = N->getOperand(0);
15490 
15491  bool IsNegPow2 = (-Divisor).isPowerOf2();
15492  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15493  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15494 
15495  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15496  Created.push_back(Op.getNode());
15497 
15498  if (IsNegPow2) {
15499  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15500  Created.push_back(Op.getNode());
15501  }
15502 
15503  return Op;
15504 }
15505 
15506 //===----------------------------------------------------------------------===//
15507 // Inline Assembly Support
15508 //===----------------------------------------------------------------------===//
15509 
15511  KnownBits &Known,
15512  const APInt &DemandedElts,
15513  const SelectionDAG &DAG,
15514  unsigned Depth) const {
15515  Known.resetAll();
15516  switch (Op.getOpcode()) {
15517  default: break;
15518  case PPCISD::LBRX: {
15519  // lhbrx is known to have the top bits cleared out.
15520  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15521  Known.Zero = 0xFFFF0000;
15522  break;
15523  }
15524  case ISD::INTRINSIC_WO_CHAIN: {
15525  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15526  default: break;
15527  case Intrinsic::ppc_altivec_vcmpbfp_p:
15528  case Intrinsic::ppc_altivec_vcmpeqfp_p:
15529  case Intrinsic::ppc_altivec_vcmpequb_p:
15530  case Intrinsic::ppc_altivec_vcmpequh_p:
15531  case Intrinsic::ppc_altivec_vcmpequw_p:
15532  case Intrinsic::ppc_altivec_vcmpequd_p:
15533  case Intrinsic::ppc_altivec_vcmpequq_p:
15534  case Intrinsic::ppc_altivec_vcmpgefp_p:
15535  case Intrinsic::ppc_altivec_vcmpgtfp_p:
15536  case Intrinsic::ppc_altivec_vcmpgtsb_p:
15537  case Intrinsic::ppc_altivec_vcmpgtsh_p:
15538  case Intrinsic::ppc_altivec_vcmpgtsw_p:
15539  case Intrinsic::ppc_altivec_vcmpgtsd_p:
15540  case Intrinsic::ppc_altivec_vcmpgtsq_p:
15541  case Intrinsic::ppc_altivec_vcmpgtub_p:
15542  case Intrinsic::ppc_altivec_vcmpgtuh_p:
15543  case Intrinsic::ppc_altivec_vcmpgtuw_p:
15544  case Intrinsic::ppc_altivec_vcmpgtud_p:
15545  case Intrinsic::ppc_altivec_vcmpgtuq_p:
15546  Known.Zero = ~1U; // All bits but the low one are known to be zero.
15547  break;
15548  }
15549  }
15550  }
15551 }
15552 
15554  switch (Subtarget.getCPUDirective()) {
15555  default: break;
15556  case PPC::DIR_970:
15557  case PPC::DIR_PWR4:
15558  case PPC::DIR_PWR5:
15559  case PPC::DIR_PWR5X:
15560  case PPC::DIR_PWR6:
15561  case PPC::DIR_PWR6X:
15562  case PPC::DIR_PWR7:
15563  case PPC::DIR_PWR8:
15564  case PPC::DIR_PWR9:
15565  case PPC::DIR_PWR10:
15566  case PPC::DIR_PWR_FUTURE: {
15567  if (!ML)
15568  break;
15569 
15571  // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15572  // so that we can decrease cache misses and branch-prediction misses.
15573  // Actual alignment of the loop will depend on the hotness check and other
15574  // logic in alignBlocks.
15575  if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15576  return Align(32);
15577  }
15578 
15579  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15580 
15581  // For small loops (between 5 and 8 instructions), align to a 32-byte
15582  // boundary so that the entire loop fits in one instruction-cache line.
15583  uint64_t LoopSize = 0;
15584  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15585  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15586  LoopSize += TII->getInstSizeInBytes(*J);
15587  if (LoopSize > 32)
15588  break;
15589  }
15590 
15591  if (LoopSize > 16 && LoopSize <= 32)
15592  return Align(32);
15593 
15594  break;
15595  }
15596  }
15597 
15599 }
15600 
15601 /// getConstraintType - Given a constraint, return the type of
15602 /// constraint it is for this target.
15605  if (Constraint.size() == 1) {
15606  switch (Constraint[0]) {
15607  default: break;
15608  case 'b':
15609  case 'r':
15610  case 'f':
15611  case 'd':
15612  case 'v':
15613  case 'y':
15614  return C_RegisterClass;
15615  case 'Z':
15616  // FIXME: While Z does indicate a memory constraint, it specifically
15617  // indicates an r+r address (used in conjunction with the 'y' modifier
15618  // in the replacement string). Currently, we're forcing the base
15619  // register to be r0 in the asm printer (which is interpreted as zero)
15620  // and forming the complete address in the second register. This is
15621  // suboptimal.
15622  return C_Memory;
15623  }
15624  } else if (Constraint == "wc") { // individual CR bits.
15625  return C_RegisterClass;
15626  } else if (Constraint == "wa" || Constraint == "wd" ||
15627  Constraint == "wf" || Constraint == "ws" ||
15628  Constraint == "wi" || Constraint == "ww") {
15629  return C_RegisterClass; // VSX registers.
15630  }
15631  return TargetLowering::getConstraintType(Constraint);
15632 }
15633 
15634 /// Examine constraint type and operand type and determine a weight value.
15635 /// This object must already have been set up with the operand type
15636 /// and the current alternative constraint selected.
15639  AsmOperandInfo &info, const char *constraint) const {
15640  ConstraintWeight weight = CW_Invalid;
15641  Value *CallOperandVal = info.CallOperandVal;
15642  // If we don't have a value, we can't do a match,
15643  // but allow it at the lowest weight.
15644  if (!CallOperandVal)
15645  return CW_Default;
15646  Type *type = CallOperandVal->getType();
15647 
15648  // Look at the constraint type.
15649  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15650  return CW_Register; // an individual CR bit.
15651  else if ((StringRef(constraint) == "wa" ||
15652  StringRef(constraint) == "wd" ||
15653  StringRef(constraint) == "wf") &&
15654  type->isVectorTy())
15655  return CW_Register;
15656  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15657  return CW_Register; // just hold 64-bit integers data.
15658  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15659  return CW_Register;
15660  else if (StringRef(constraint) == "ww" && type->isFloatTy())
15661  return CW_Register;
15662 
15663  switch (*constraint) {
15664  default:
15666  break;
15667  case 'b':
15668  if (type->isIntegerTy())
15669  weight = CW_Register;
15670  break;
15671  case 'f':
15672  if (type->isFloatTy())
15673  weight = CW_Register;
15674  break;
15675  case 'd':
15676  if (type->isDoubleTy())
15677  weight = CW_Register;
15678  break;
15679  case 'v':
15680  if (type->isVectorTy())
15681  weight = CW_Register;
15682  break;
15683  case 'y':
15684  weight = CW_Register;
15685  break;
15686  case 'Z':
15687  weight = CW_Memory;
15688  break;
15689  }
15690  return weight;
15691 }
15692 
15693 std::pair<unsigned, const TargetRegisterClass *>
15695  StringRef Constraint,
15696  MVT VT) const {
15697  if (Constraint.size() == 1) {
15698  // GCC RS6000 Constraint Letters
15699  switch (Constraint[0]) {
15700  case 'b': // R1-R31
15701  if (VT == MVT::i64 && Subtarget.isPPC64())
15702  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15703  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15704  case 'r': // R0-R31
15705  if (VT == MVT::i64 && Subtarget.isPPC64())
15706  return std::make_pair(0U, &PPC::G8RCRegClass);
15707  return std::make_pair(0U, &PPC::GPRCRegClass);
15708  // 'd' and 'f' constraints are both defined to be "the floating point
15709  // registers", where one is for 32-bit and the other for 64-bit. We don't
15710  // really care overly much here so just give them all the same reg classes.
15711  case 'd':
15712  case 'f':
15713  if (Subtarget.hasSPE()) {
15714  if (VT == MVT::f32 || VT == MVT::i32)
15715  return std::make_pair(0U, &PPC::GPRCRegClass);
15716  if (VT == MVT::f64 || VT == MVT::i64)
15717  return std::make_pair(0U, &PPC::SPERCRegClass);
15718  } else {
15719  if (VT == MVT::f32 || VT == MVT::i32)
15720  return std::make_pair(0U, &PPC::F4RCRegClass);
15721  if (VT == MVT::f64 || VT == MVT::i64)
15722  return std::make_pair(0U, &PPC::F8RCRegClass);
15723  }
15724  break;
15725  case 'v':
15726  if (Subtarget.hasAltivec())
15727  return std::make_pair(0U, &PPC::VRRCRegClass);
15728  break;
15729  case 'y': // crrc
15730  return std::make_pair(0U, &PPC::CRRCRegClass);
15731  }
15732  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15733  // An individual CR bit.
15734  return std::make_pair(0U, &PPC::CRBITRCRegClass);
15735  } else if ((Constraint == "wa" || Constraint == "wd" ||
15736  Constraint == "wf" || Constraint == "wi") &&
15737  Subtarget.hasVSX()) {
15738  // A VSX register for either a scalar (FP) or vector. There is no
15739  // support for single precision scalars on subtargets prior to Power8.
15740  if (VT.isVector())
15741  return std::make_pair(0U, &PPC::VSRCRegClass);
15742  if (VT == MVT::f32 && Subtarget.hasP8Vector())
15743  return std::make_pair(0U, &PPC::VSSRCRegClass);
15744  return std::make_pair(0U, &PPC::VSFRCRegClass);
15745  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15746  if (VT == MVT::f32 && Subtarget.hasP8Vector())
15747  return std::make_pair(0U, &PPC::VSSRCRegClass);
15748  else
15749  return std::make_pair(0U, &PPC::VSFRCRegClass);
15750  } else if (Constraint == "lr") {
15751  if (VT == MVT::i64)
15752  return std::make_pair(0U, &PPC::LR8RCRegClass);
15753  else
15754  return std::make_pair(0U, &PPC::LRRCRegClass);
15755  }
15756 
15757  // Handle special cases of physical registers that are not properly handled
15758  // by the base class.
15759  if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
15760  // If we name a VSX register, we can't defer to the base class because it
15761  // will not recognize the correct register (their names will be VSL{0-31}
15762  // and V{0-31} so they won't match). So we match them here.
15763  if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15764  int VSNum = atoi(Constraint.data() + 3);
15765  assert(VSNum >= 0 && VSNum <= 63 &&
15766  "Attempted to access a vsr out of range");
15767  if (VSNum < 32)
15768  return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15769  return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15770  }
15771 
15772  // For float registers, we can't defer to the base class as it will match
15773  // the SPILLTOVSRRC class.
15774  if (Constraint.size() > 3 && Constraint[1] == 'f') {
15775  int RegNum = atoi(Constraint.data() + 2);
15776  if (RegNum > 31 || RegNum < 0)
15777  report_fatal_error("Invalid floating point register number");
15778  if (VT == MVT::f32 || VT == MVT::i32)
15779  return Subtarget.hasSPE()
15780  ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
15781  : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
15782  if (VT == MVT::f64 || VT == MVT::i64)
15783  return Subtarget.hasSPE()
15784  ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
15785  : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
15786  }
15787  }
15788 
15789  std::pair<unsigned, const TargetRegisterClass *> R =
15791 
15792  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15793  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15794  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15795  // register.
15796  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15797  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15798  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15799  PPC::GPRCRegClass.contains(R.first))
15800  return std::make_pair(TRI->getMatchingSuperReg(R.first,
15801  PPC::sub_32, &PPC::G8RCRegClass),
15802  &PPC::G8RCRegClass);
15803 
15804  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15805  if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
15806  R.first = PPC::CR0;
15807  R.second = &PPC::CRRCRegClass;
15808  }
15809  // FIXME: This warning should ideally be emitted in the front end.
15810  const auto &TM = getTargetMachine();
15811  if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
15812  if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
15813  (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
15814  (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
15815  errs() << "warning: vector registers 20 to 32 are reserved in the "
15816  "default AIX AltiVec ABI and cannot be used\n";
15817  }
15818 
15819  return R;
15820 }
15821 
15822 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15823 /// vector. If it is invalid, don't add anything to Ops.
15825  std::string &Constraint,
15826  std::vector<SDValue>&Ops,
15827  SelectionDAG &DAG) const {
15828  SDValue Result;
15829 
15830  // Only support length 1 constraints.
15831  if (Constraint.length() > 1) return;
15832 
15833  char Letter = Constraint[0];
15834  switch (Letter) {
15835  default: break;
15836  case 'I':
15837  case 'J':
15838  case 'K':
15839  case 'L':
15840  case 'M':
15841  case 'N':
15842  case 'O':
15843  case 'P': {
15844  ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15845  if (!CST) return; // Must be an immediate to match.
15846  SDLoc dl(Op);
15847  int64_t Value = CST->getSExtValue();
15848  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15849  // numbers are printed as such.
15850  switch (Letter) {
15851  default: llvm_unreachable("Unknown constraint letter!");
15852  case 'I': // "I" is a signed 16-bit constant.
15853  if (isInt<16>(Value))
15854  Result = DAG.getTargetConstant(Value, dl, TCVT);
15855  break;
15856  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15857  if (isShiftedUInt<16, 16>(Value))
15858  Result = DAG.getTargetConstant(Value, dl, TCVT);
15859  break;
15860  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15861  if (isShiftedInt<16, 16>(Value))
15862  Result = DAG.getTargetConstant(Value, dl, TCVT);
15863  break;
15864  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15865  if (isUInt<16>(Value))
15866  Result = DAG.getTargetConstant(Value, dl, TCVT);
15867  break;
15868  case 'M': // "M" is a constant that is greater than 31.
15869  if (Value > 31)
15870  Result = DAG.getTargetConstant(Value, dl, TCVT);
15871  break;
15872  case 'N': // "N" is a positive constant that is an exact power of two.
15873  if (Value > 0 && isPowerOf2_64(Value))
15874  Result = DAG.getTargetConstant(Value, dl, TCVT);
15875  break;
15876  case 'O': // "O" is the constant zero.
15877  if (Value == 0)
15878  Result = DAG.getTargetConstant(Value, dl, TCVT);
15879  break;
15880  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15881  if (isInt<16>(-Value))
15882  Result = DAG.getTargetConstant(Value, dl, TCVT);
15883  break;
15884  }
15885  break;
15886  }
15887  }
15888 
15889  if (Result.getNode()) {
15890  Ops.push_back(Result);
15891  return;
15892  }
15893 
15894  // Handle standard constraint letters.
15895  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15896 }
15897 
15898 // isLegalAddressingMode - Return true if the addressing mode represented
15899 // by AM is legal for this target, for a load/store of the specified type.
15901  const AddrMode &AM, Type *Ty,
15902  unsigned AS,
15903  Instruction *I) const {
15904  // Vector type r+i form is supported since power9 as DQ form. We don't check
15905  // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15906  // imm form is preferred and the offset can be adjusted to use imm form later
15907  // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15908  // max offset to check legal addressing mode, we should be a little aggressive
15909  // to contain other offsets for that LSRUse.
15910  if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15911  return false;
15912 
15913  // PPC allows a sign-extended 16-bit immediate field.
15914  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15915  return false;
15916 
15917  // No global is ever allowed as a base.
15918  if (AM.BaseGV)
15919  return false;
15920 
15921  // PPC only support r+r,
15922  switch (AM.Scale) {
15923  case 0: // "r+i" or just "i", depending on HasBaseReg.
15924  break;
15925  case 1:
15926  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15927  return false;
15928  // Otherwise we have r+r or r+i.
15929  break;
15930  case 2:
15931  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15932  return false;
15933  // Allow 2*r as r+r.
15934  break;
15935  default:
15936  // No other scales are supported.
15937  return false;
15938  }
15939 
15940  return true;
15941 }
15942 
15943 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15944  SelectionDAG &DAG) const {
15945  MachineFunction &MF = DAG.getMachineFunction();
15946  MachineFrameInfo &MFI = MF.getFrameInfo();
15947  MFI.setReturnAddressIsTaken(true);
15948 
15950  return SDValue();
15951 
15952  SDLoc dl(Op);
15953  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15954 
15955  // Make sure the function does not optimize away the store of the RA to
15956  // the stack.
15957  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15958  FuncInfo->setLRStoreRequired();
15959  bool isPPC64 = Subtarget.isPPC64();
15960  auto PtrVT = getPointerTy(MF.getDataLayout());
15961 
15962  if (Depth > 0) {
15963  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15964  SDValue Offset =
15965  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15966  isPPC64 ? MVT::i64 : MVT::i32);
15967  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15968  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15969  MachinePointerInfo());
15970  }
15971 
15972  // Just load the return address off the stack.
15973  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15974  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15975  MachinePointerInfo());
15976 }
15977 
15978 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15979  SelectionDAG &DAG) const {
15980  SDLoc dl(Op);
15981  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15982 
15983  MachineFunction &MF = DAG.getMachineFunction();
15984  MachineFrameInfo &MFI = MF.getFrameInfo();
15985  MFI.setFrameAddressIsTaken(true);
15986 
15987  EVT PtrVT = getPointerTy(MF.getDataLayout());
15988  bool isPPC64 = PtrVT == MVT::i64;
15989 
15990  // Naked functions never have a frame pointer, and so we use r1. For all
15991  // other functions, this decision must be delayed until during PEI.
15992  unsigned FrameReg;
15993  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15994  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15995  else
15996  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15997 
15998  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15999  PtrVT);
16000  while (Depth--)
16001  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16002  FrameAddr, MachinePointerInfo());
16003  return FrameAddr;
16004 }
16005 
16006 // FIXME? Maybe this could be a TableGen attribute on some registers and
16007 // this table could be generated automatically from RegInfo.
16009  const MachineFunction &MF) const {
16010  bool isPPC64 = Subtarget.isPPC64();
16011 
16012  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16013  if (!is64Bit && VT != LLT::scalar(32))
16014  report_fatal_error("Invalid register global variable type");
16015 
16017  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16018  .Case("r2", isPPC64 ? Register() : PPC::R2)
16019  .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16020  .Default(Register());
16021 
16022  if (Reg)
16023  return Reg;
16024  report_fatal_error("Invalid register name global variable");
16025 }
16026 
16028  // 32-bit SVR4 ABI access everything as got-indirect.
16029  if (Subtarget.is32BitELFABI())
16030  return true;
16031 
16032  // AIX accesses everything indirectly through the TOC, which is similar to
16033  // the GOT.
16034  if (Subtarget.isAIXABI())
16035  return true;
16036 
16038  // If it is small or large code model, module locals are accessed
16039  // indirectly by loading their address from .toc/.got.
16040  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16041  return true;
16042 
16043  // JumpTable and BlockAddress are accessed as got-indirect.
16044  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16045  return true;
16046 
16047  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16048  return Subtarget.isGVIndirectSymbol(G->getGlobal());
16049 
16050  return false;
16051 }
16052 
16053 bool
16055  // The PowerPC target isn't yet aware of offsets.
16056  return false;
16057 }
16058 
16060  const CallInst &I,
16061  MachineFunction &MF,
16062  unsigned Intrinsic) const {
16063  switch (Intrinsic) {
16064  case Intrinsic::ppc_atomicrmw_xchg_i128:
16065  case Intrinsic::ppc_atomicrmw_add_i128:
16066  case Intrinsic::ppc_atomicrmw_sub_i128:
16067  case Intrinsic::ppc_atomicrmw_nand_i128:
16068  case Intrinsic::ppc_atomicrmw_and_i128:
16069  case Intrinsic::ppc_atomicrmw_or_i128:
16070  case Intrinsic::ppc_atomicrmw_xor_i128:
16071  case Intrinsic::ppc_cmpxchg_i128:
16073  Info.memVT = MVT::i128;
16074  Info.ptrVal = I.getArgOperand(0);
16075  Info.offset = 0;
16076  Info.align = Align(16);
16079  return true;
16080  case Intrinsic::ppc_altivec_lvx:
16081  case Intrinsic::ppc_altivec_lvxl:
16082  case Intrinsic::ppc_altivec_lvebx:
16083  case Intrinsic::ppc_altivec_lvehx:
16084  case Intrinsic::ppc_altivec_lvewx:
16085  case Intrinsic::ppc_vsx_lxvd2x:
16086  case Intrinsic::ppc_vsx_lxvw4x:
16087  case Intrinsic::ppc_vsx_lxvd2x_be:
16088  case Intrinsic::ppc_vsx_lxvw4x_be:
16089  case Intrinsic::ppc_vsx_lxvl:
16090  case Intrinsic::ppc_vsx_lxvll: {
16091  EVT VT;
16092  switch (Intrinsic) {
16093  case Intrinsic::ppc_altivec_lvebx:
16094  VT = MVT::i8;
16095  break;
16096  case Intrinsic::ppc_altivec_lvehx:
16097  VT = MVT::i16;
16098  break;
16099  case Intrinsic::ppc_altivec_lvewx:
16100  VT = MVT::i32;
16101  break;
16102  case Intrinsic::ppc_vsx_lxvd2x:
16103  case Intrinsic::ppc_vsx_lxvd2x_be:
16104  VT = MVT::v2f64;
16105  break;
16106  default:
16107  VT = MVT::v4i32;
16108  break;
16109  }
16110 
16112  Info.memVT = VT;
16113  Info.ptrVal = I.getArgOperand(0);
16114  Info.offset = -VT.getStoreSize()+1;
16115  Info.size = 2*VT.getStoreSize()-1;
16116  Info.align = Align(1);
16118  return true;
16119  }
16120  case Intrinsic::ppc_altivec_stvx:
16121  case Intrinsic::ppc_altivec_stvxl:
16122  case Intrinsic::ppc_altivec_stvebx:
16123  case Intrinsic::ppc_altivec_stvehx:
16124  case Intrinsic::ppc_altivec_stvewx:
16125  case Intrinsic::ppc_vsx_stxvd2x:
16126  case Intrinsic::ppc_vsx_stxvw4x:
16127  case Intrinsic::ppc_vsx_stxvd2x_be:
16128  case Intrinsic::ppc_vsx_stxvw4x_be:
16129  case Intrinsic::ppc_vsx_stxvl:
16130  case Intrinsic::ppc_vsx_stxvll: {
16131  EVT VT;
16132  switch (Intrinsic) {
16133  case Intrinsic::ppc_altivec_stvebx:
16134  VT = MVT::i8;
16135  break;
16136  case Intrinsic::ppc_altivec_stvehx:
16137  VT = MVT::i16;
16138  break;
16139  case Intrinsic::ppc_altivec_stvewx:
16140  VT = MVT::i32;
16141  break;
16142  case Intrinsic::ppc_vsx_stxvd2x:
16143  case Intrinsic::ppc_vsx_stxvd2x_be:
16144  VT = MVT::v2f64;
16145  break;
16146  default:
16147  VT = MVT::v4i32;
16148  break;
16149  }
16150 
16151  Info.opc = ISD::INTRINSIC_VOID;
16152  Info.memVT = VT;
16153  Info.ptrVal = I.getArgOperand(1);
16154  Info.offset = -VT.getStoreSize()+1;
16155  Info.size = 2*VT.getStoreSize()-1;
16156  Info.align = Align(1);
16158  return true;
16159  }
16160  default:
16161  break;
16162  }
16163 
16164  return false;
16165 }
16166 
16167 /// It returns EVT::Other if the type should be determined using generic
16168 /// target-independent logic.
16170  const MemOp &Op, const AttributeList &FuncAttributes) const {
16171  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16172  // We should use Altivec/VSX loads and stores when available. For unaligned
16173  // addresses, unaligned VSX loads are only fast starting with the P8.
16174  if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16175  (Op.isAligned(Align(16)) ||
16176  ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16177  return MVT::v4i32;
16178  }
16179 
16180  if (Subtarget.isPPC64()) {
16181  return MVT::i64;
16182  }
16183 
16184  return MVT::i32;
16185 }
16186 
16187 /// Returns true if it is beneficial to convert a load of a constant
16188 /// to just the constant itself.
16190  Type *Ty) const {
16191  assert(Ty->isIntegerTy());
16192 
16193  unsigned BitSize = Ty->getPrimitiveSizeInBits();
16194  return !(BitSize == 0 || BitSize > 64);
16195 }
16196 
16198  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16199  return false;
16200  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16201  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16202  return NumBits1 == 64 && NumBits2 == 32;
16203 }
16204 
16206  if (!VT1.isInteger() || !VT2.isInteger())
16207  return false;
16208  unsigned NumBits1 = VT1.getSizeInBits();
16209  unsigned NumBits2 = VT2.getSizeInBits();
16210  return NumBits1 == 64 && NumBits2 == 32;
16211 }
16212 
16214  // Generally speaking, zexts are not free, but they are free when they can be
16215  // folded with other operations.
16216  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16217  EVT MemVT = LD->getMemoryVT();
16218  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16219  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16220  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16221  LD->getExtensionType() == ISD::ZEXTLOAD))
16222  return true;
16223  }
16224 
16225  // FIXME: Add other cases...
16226  // - 32-bit shifts with a zext to i64
16227  // - zext after ctlz, bswap, etc.
16228  // - zext after and by a constant mask
16229 
16230  return TargetLowering::isZExtFree(Val, VT2);
16231 }
16232 
16233 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16234  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16235  "invalid fpext types");
16236  // Extending to float128 is not free.
16237  if (DestVT == MVT::f128)
16238  return false;
16239  return true;
16240 }
16241 
16243  return isInt<16>(Imm) || isUInt<16>(Imm);
16244 }
16245 
16247  return isInt<16>(Imm) || isUInt<16>(Imm);
16248 }
16249 
16252  bool *Fast) const {
16253  if (DisablePPCUnaligned)
16254  return false;
16255 
16256  // PowerPC supports unaligned memory access for simple non-vector types.
16257  // Although accessing unaligned addresses is not as efficient as accessing
16258  // aligned addresses, it is generally more efficient than manual expansion,
16259  // and generally only traps for software emulation when crossing page
16260  // boundaries.
16261 
16262  if (!VT.isSimple())
16263  return false;
16264 
16265  if (VT.isFloatingPoint() && !VT.isVector() &&
16266  !Subtarget.allowsUnalignedFPAccess())
16267  return false;
16268 
16269  if (VT.getSimpleVT().isVector()) {
16270  if (Subtarget.hasVSX()) {
16271  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16272  VT != MVT::v4f32 && VT != MVT::v4i32)
16273  return false;
16274  } else {
16275  return false;
16276  }
16277  }
16278 
16279  if (VT == MVT::ppcf128)
16280  return false;
16281 
16282  if (Fast)
16283  *Fast = true;
16284 
16285  return true;
16286 }
16287 
16289  SDValue C) const {
16290  // Check integral scalar types.
16291  if (!VT.isScalarInteger())
16292  return false;
16293  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16294  if (!ConstNode->getAPIntValue().isSignedIntN(64))
16295  return false;
16296  // This transformation will generate >= 2 operations. But the following
16297  // cases will generate <= 2 instructions during ISEL. So exclude them.
16298  // 1. If the constant multiplier fits 16 bits, it can be handled by one
16299  // HW instruction, ie. MULLI
16300  // 2. If the multiplier after shifted fits 16 bits, an extra shift
16301  // instruction is needed than case 1, ie. MULLI and RLDICR
16302  int64_t Imm = ConstNode->getSExtValue();
16303  unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16304  Imm >>= Shift;
16305  if (isInt<16>(Imm))
16306  return false;
16307  uint64_t UImm = static_cast<uint64_t>(Imm);
16308  if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16309  isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16310  return true;
16311  }
16312  return false;
16313 }
16314 
16316  EVT VT) const {
16318  MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16319 }
16320 
16322  Type *Ty) const {
16323  switch (Ty->getScalarType()->getTypeID()) {
16324  case Type::FloatTyID:
16325  case Type::DoubleTyID:
16326  return true;
16327  case Type::FP128TyID:
16328  return Subtarget.hasP9Vector();
16329  default:
16330  return false;
16331  }
16332 }
16333 
16334 // FIXME: add more patterns which are not profitable to hoist.
16336  if (!I->hasOneUse())
16337  return true;
16338 
16339  Instruction *User = I->user_back();
16340  assert(User && "A single use instruction with no uses.");
16341 
16342  switch (I->getOpcode()) {
16343  case Instruction::FMul: {
16344  // Don't break FMA, PowerPC prefers FMA.
16345  if (User->getOpcode() != Instruction::FSub &&
16346  User->getOpcode() != Instruction::FAdd)
16347  return true;
16348 
16350  const Function *F = I->getFunction();
16351  const DataLayout &DL = F->getParent()->getDataLayout();
16352  Type *Ty = User->getOperand(0)->getType();
16353 
16354  return !(
16355  isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16357  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16358  }
16359  case Instruction::Load: {
16360  // Don't break "store (load float*)" pattern, this pattern will be combined
16361  // to "store (load int32)" in later InstCombine pass. See function
16362  // combineLoadToOperationType. On PowerPC, loading a float point takes more
16363  // cycles than loading a 32 bit integer.
16364  LoadInst *LI = cast<LoadInst>(I);
16365  // For the loads that combineLoadToOperationType does nothing, like
16366  // ordered load, it should be profitable to hoist them.
16367  // For swifterror load, it can only be used for pointer to pointer type, so
16368  // later type check should get rid of this case.
16369  if (!LI->isUnordered())
16370  return true;
16371 
16372  if (User->getOpcode() != Instruction::Store)
16373  return true;
16374 
16375  if (I->getType()->getTypeID() != Type::FloatTyID)
16376  return true;
16377 
16378  return false;
16379  }
16380  default:
16381  return true;
16382  }
16383  return true;
16384 }
16385 
16386 const MCPhysReg *
16388  // LR is a callee-save register, but we must treat it as clobbered by any call
16389  // site. Hence we include LR in the scratch registers, which are in turn added
16390  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16391  // to CTR, which is used by any indirect call.
16392  static const MCPhysReg ScratchRegs[] = {
16393  PPC::X12, PPC::LR8, PPC::CTR8, 0
16394  };
16395 
16396  return ScratchRegs;
16397 }
16398 
16400  const Constant *PersonalityFn) const {
16401  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16402 }
16403 
16405  const Constant *PersonalityFn) const {
16406  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16407 }
16408 
16409 bool
16411  EVT VT , unsigned DefinedValues) const {
16412  if (VT == MVT::v2i64)
16413  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16414 
16415  if (Subtarget.hasVSX())
16416  return true;
16417 
16418  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16419 }
16420 
16422  if (DisableILPPref || Subtarget.enableMachineScheduler())
16424 
16425  return Sched::ILP;
16426 }
16427 
16428 // Create a fast isel object.
16429 FastISel *
16431  const TargetLibraryInfo *LibInfo) const {
16432  return PPC::createFastISel(FuncInfo, LibInfo);
16433 }
16434 
16435 // 'Inverted' means the FMA opcode after negating one multiplicand.
16436 // For example, (fma -a b c) = (fnmsub a b c)
16437 static unsigned invertFMAOpcode(unsigned Opc) {
16438  switch (Opc) {
16439  default:
16440  llvm_unreachable("Invalid FMA opcode for PowerPC!");
16441  case ISD::FMA:
16442  return PPCISD::FNMSUB;
16443  case PPCISD::FNMSUB:
16444  return ISD::FMA;
16445  }
16446 }
16447 
16449  bool LegalOps, bool OptForSize,
16450  NegatibleCost &Cost,
16451  unsigned Depth) const {
16453  return SDValue();
16454 
16455  unsigned Opc = Op.getOpcode();
16456  EVT VT = Op.getValueType();
16457  SDNodeFlags Flags = Op.getNode()->getFlags();
16458 
16459  switch (Opc) {
16460  case PPCISD::FNMSUB:
16461  if (!Op.hasOneUse() || !isTypeLegal(VT))
16462  break;
16463 
16465  SDValue N0 = Op.getOperand(0);
16466  SDValue N1 = Op.getOperand(1);
16467  SDValue N2 = Op.getOperand(2);
16468  SDLoc Loc(Op);
16469 
16471  SDValue NegN2 =
16472  getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16473 
16474  if (!NegN2)
16475  return SDValue();
16476 
16477  // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16478  // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16479  // These transformations may change sign of zeroes. For example,
16480  // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16481  if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16482  // Try and choose the cheaper one to negate.
16484  SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16485  N0Cost, Depth + 1);
16486 
16488  SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16489  N1Cost, Depth + 1);
16490 
16491  if (NegN0 && N0Cost <= N1Cost) {
16492  Cost = std::min(N0Cost, N2Cost);
16493  return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16494  } else if (NegN1) {
16495  Cost = std::min(N1Cost, N2Cost);
16496  return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16497  }
16498  }
16499 
16500  // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16501  if (isOperationLegal(ISD::FMA, VT)) {
16502  Cost = N2Cost;
16503  return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16504  }
16505 
16506  break;
16507  }
16508 
16509  return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16510  Cost, Depth);
16511 }
16512 
16513 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16515  if (!Subtarget.isTargetLinux())
16517  return true;
16518 }
16519 
16520 // Override to disable global variable loading on Linux and insert AIX canary
16521 // word declaration.
16523  if (Subtarget.isAIXABI()) {
16524  M.getOrInsertGlobal(AIXSSPCanaryWordName,
16525  Type::getInt8PtrTy(M.getContext()));
16526  return;
16527  }
16528  if (!Subtarget.isTargetLinux())
16530 }
16531 
16533  if (Subtarget.isAIXABI())
16534  return M.getGlobalVariable(AIXSSPCanaryWordName);
16536 }
16537 
16539  bool ForCodeSize) const {
16540  if (!VT.isSimple() || !Subtarget.hasVSX())
16541  return false;
16542 
16543  switch(VT.getSimpleVT().SimpleTy) {
16544  default:
16545  // For FP types that are currently not supported by PPC backend, return
16546  // false. Examples: f16, f80.
16547  return false;
16548  case MVT::f32:
16549  case MVT::f64:
16550  if (Subtarget.hasPrefixInstrs()) {
16551  // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
16552  return true;
16553  }
16555  case MVT::ppcf128:
16556  return Imm.isPosZero();
16557  }
16558 }
16559 
16560 // For vector shift operation op, fold
16561 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16563  SelectionDAG &DAG) {
16564  SDValue N0 = N->getOperand(0);
16565  SDValue N1 = N->getOperand(1);
16566  EVT VT = N0.getValueType();
16567  unsigned OpSizeInBits = VT.getScalarSizeInBits();
16568  unsigned Opcode = N->getOpcode();
16569  unsigned TargetOpcode;
16570 
16571  switch (Opcode) {
16572  default:
16573  llvm_unreachable("Unexpected shift operation");
16574  case ISD::SHL:
16575  TargetOpcode = PPCISD::SHL;
16576  break;
16577  case ISD::SRL:
16578  TargetOpcode = PPCISD::SRL;
16579  break;
16580  case ISD::SRA:
16581  TargetOpcode = PPCISD::SRA;
16582  break;
16583  }
16584 
16585  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16586  N1->getOpcode() == ISD::AND)
16588  if (Mask->getZExtValue() == OpSizeInBits - 1)
16589  return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16590 
16591  return SDValue();
16592 }
16593 
16594 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16595  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16596  return Value;
16597 
16598  SDValue N0 = N->getOperand(0);
16599  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16600  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16601  N0.getOpcode() != ISD::SIGN_EXTEND ||
16602  N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16603  N->getValueType(0) != MVT::i64)
16604  return SDValue();
16605 
16606  // We can't save an operation here if the value is already extended, and
16607  // the existing shift is easier to combine.
16608  SDValue ExtsSrc = N0.getOperand(0);
16609  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16610  ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16611  return SDValue();
16612 
16613  SDLoc DL(N0);
16614  SDValue ShiftBy = SDValue(CN1, 0);
16615  // We want the shift amount to be i32 on the extswli, but the shift could
16616  // have an i64.
16617  if (ShiftBy.getValueType() == MVT::i64)
16618  ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16619 
16620  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16621  ShiftBy);
16622 }
16623 
16624 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16625  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16626  return Value;
16627 
16628  return SDValue();
16629 }
16630 
16631 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16632  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16633  return Value;
16634 
16635  return SDValue();
16636 }
16637 
16638 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16639 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16640 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16641 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16643  const PPCSubtarget &Subtarget) {
16644  if (!Subtarget.isPPC64())
16645  return SDValue();
16646 
16647  SDValue LHS = N->getOperand(0);
16648  SDValue RHS = N->getOperand(1);
16649 
16650  auto isZextOfCompareWithConstant = [](SDValue Op) {
16651  if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16652  Op.getValueType() != MVT::i64)
16653  return false;
16654 
16655  SDValue Cmp = Op.getOperand(0);
16656  if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16657  Cmp.getOperand(0).getValueType() != MVT::i64)
16658  return false;
16659 
16660  if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16661  int64_t NegConstant = 0 - Constant->getSExtValue();
16662  // Due to the limitations of the addi instruction,
16663  // -C is required to be [-32768, 32767].
16664  return isInt<16>(NegConstant);
16665  }
16666 
16667  return false;
16668  };
16669 
16670  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16671  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16672 
16673  // If there is a pattern, canonicalize a zext operand to the RHS.
16674  if (LHSHasPattern && !RHSHasPattern)
16675  std::swap(LHS, RHS);
16676  else if (!LHSHasPattern && !RHSHasPattern)
16677  return SDValue();
16678 
16679  SDLoc DL(N);
16680  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16681  SDValue Cmp = RHS.getOperand(0);
16682  SDValue Z = Cmp.getOperand(0);
16683  auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
16684  int64_t NegConstant = 0 - Constant->getSExtValue();
16685 
16686  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16687  default: break;
16688  case ISD::SETNE: {
16689  // when C == 0
16690  // --> addze X, (addic Z, -1).carry
16691  // /
16692  // add X, (zext(setne Z, C))--
16693  // \ when -32768 <= -C <= 32767 && C != 0
16694  // --> addze X, (addic (addi Z, -C), -1).carry
16695  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16696  DAG.getConstant(NegConstant, DL, MVT::i64));
16697  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16698  SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16699  AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16700  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16701  SDValue(Addc.getNode(), 1));
16702  }
16703  case ISD::SETEQ: {
16704  // when C == 0
16705  // --> addze X, (subfic Z, 0).carry
16706  // /
16707  // add X, (zext(sete Z, C))--
16708  // \ when -32768 <= -C <= 32767 && C != 0
16709  // --> addze X, (subfic (addi Z, -C), 0).carry
16710  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16711  DAG.getConstant(NegConstant, DL, MVT::i64));
16712  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16713  SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16714  DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16715  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16716  SDValue(Subc.getNode(), 1));
16717  }
16718  }
16719 
16720  return SDValue();
16721 }
16722 
16723 // Transform
16724 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16725 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16726 // In this case both C1 and C2 must be known constants.
16727 // C1+C2 must fit into a 34 bit signed integer.
16729  const PPCSubtarget &Subtarget) {
16730  if (!Subtarget.isUsingPCRelativeCalls())
16731  return SDValue();
16732 
16733  // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16734  // If we find that node try to cast the Global Address and the Constant.
16735  SDValue LHS = N->getOperand(0);
16736  SDValue RHS = N->getOperand(1);
16737 
16738  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16739  std::swap(LHS, RHS);
16740 
16741  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16742  return SDValue();
16743 
16744  // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16745  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16746  ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16747 
16748  // Check that both casts succeeded.
16749  if (!GSDN || !ConstNode)
16750  return SDValue();
16751 
16752  int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16753  SDLoc DL(GSDN);
16754 
16755  // The signed int offset needs to fit in 34 bits.
16756  if (!isInt<34>(NewOffset))
16757  return SDValue();
16758 
16759  // The new global address is a copy of the old global address except
16760  // that it has the updated Offset.
16761  SDValue GA =
16762  DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16763  NewOffset, GSDN->getTargetFlags());
16764  SDValue MatPCRel =
16765  DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16766  return MatPCRel;
16767 }
16768 
16769 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16770  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16771  return Value;
16772 
16773  if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16774  return Value;
16775 
16776  return SDValue();
16777 }
16778 
16779 // Detect TRUNCATE operations on bitcasts of float128 values.
16780 // What we are looking for here is the situtation where we extract a subset
16781 // of bits from a 128 bit float.
16782 // This can be of two forms:
16783 // 1) BITCAST of f128 feeding TRUNCATE
16784 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16785 // The reason this is required is because we do not have a legal i128 type
16786 // and so we want to prevent having to store the f128 and then reload part
16787 // of it.
16788 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16789  DAGCombinerInfo &DCI) const {
16790  // If we are using CRBits then try that first.
16791  if (Subtarget.useCRBits()) {
16792  // Check if CRBits did anything and return that if it did.
16793  if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16794  return CRTruncValue;
16795  }
16796 
16797  SDLoc dl(N);
16798  SDValue Op0 = N->getOperand(0);
16799 
16800  // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16801  if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16802  EVT VT = N->getValueType(0);
16803  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16804  return SDValue();
16805  SDValue Sub = Op0.getOperand(0);
16806  if (Sub.getOpcode() == ISD::SUB) {
16807  SDValue SubOp0 = Sub.getOperand(0);
16808  SDValue SubOp1 = Sub.getOperand(1);
16809  if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16810  (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16811  return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16812  SubOp1.getOperand(0),
16813  DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16814  }
16815  }
16816  }
16817 
16818  // Looking for a truncate of i128 to i64.
16819  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16820  return SDValue();
16821 
16822  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16823 
16824  // SRL feeding TRUNCATE.
16825  if (Op0.getOpcode() == ISD::SRL) {
16826  ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16827  // The right shift has to be by 64 bits.
16828  if (!ConstNode || ConstNode->getZExtValue() != 64)
16829  return SDValue();
16830 
16831  // Switch the element number to extract.
16832  EltToExtract = EltToExtract ? 0 : 1;
16833  // Update Op0 past the SRL.
16834  Op0 = Op0.getOperand(0);
16835  }
16836 
16837  // BITCAST feeding a TRUNCATE possibly via SRL.
16838  if (Op0.getOpcode() == ISD::BITCAST &&
16839  Op0.getValueType() == MVT::i128 &&
16840  Op0.getOperand(0).getValueType() == MVT::f128) {
16841  SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16842  return DCI.DAG.getNode(
16844  DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16845  }
16846  return SDValue();
16847 }
16848 
16849 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16850  SelectionDAG &DAG = DCI.DAG;
16851 
16852  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16853  if (!ConstOpOrElement)
16854  return SDValue();
16855 
16856  // An imul is usually smaller than the alternative sequence for legal type.
16857  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16858  isOperationLegal(ISD::MUL, N->getValueType(0)))
16859  return SDValue();
16860 
16861  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16862  switch (this->Subtarget.getCPUDirective()) {
16863  default:
16864  // TODO: enhance the condition for subtarget before pwr8
16865  return false;
16866  case PPC::DIR_PWR8:
16867  // type mul add shl
16868  // scalar 4 1 1
16869  // vector 7 2 2
16870  return true;
16871  case PPC::DIR_PWR9:
16872  case PPC::DIR_PWR10:
16873  case PPC::DIR_PWR_FUTURE:
16874  // type mul add shl
16875  // scalar 5 2 2
16876  // vector 7 2 2
16877 
16878  // The cycle RATIO of related operations are showed as a table above.
16879  // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16880  // scalar and vector type. For 2 instrs patterns, add/sub + shl
16881  // are 4, it is always profitable; but for 3 instrs patterns
16882  // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16883  // So we should only do it for vector type.
16884  return IsAddOne && IsNeg ? VT.isVector() : true;
16885  }
16886  };
16887 
16888  EVT VT = N->getValueType(0);
16889  SDLoc DL(N);
16890 
16891  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16892  bool IsNeg = MulAmt.isNegative();
16893  APInt MulAmtAbs = MulAmt.abs();
16894 
16895  if ((MulAmtAbs - 1).isPowerOf2()) {
16896  // (mul x, 2^N + 1) => (add (shl x, N), x)
16897  // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16898 
16899  if (!IsProfitable(IsNeg, true, VT))
16900  return SDValue();
16901 
16902  SDValue Op0 = N->getOperand(0);
16903  SDValue Op1 =
16904  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16905  DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16906  SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16907 
16908  if (!IsNeg)
16909  return Res;
16910 
16911  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16912  } else if ((MulAmtAbs + 1).isPowerOf2()) {
16913  // (mul x, 2^N - 1) => (sub (shl x, N), x)
16914  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16915 
16916  if (!IsProfitable(IsNeg, false, VT))
16917  return SDValue();
16918 
16919  SDValue Op0 = N->getOperand(0);
16920  SDValue Op1 =
16921  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16922  DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16923 
16924  if (!IsNeg)
16925  return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16926  else
16927  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16928 
16929  } else {
16930  return SDValue();
16931  }
16932 }
16933 
16934 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16935 // in combiner since we need to check SD flags and other subtarget features.
16936 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16937  DAGCombinerInfo &DCI) const {
16938  SDValue N0 = N->getOperand(0);
16939  SDValue N1 = N->getOperand(1);
16940  SDValue N2 = N->getOperand(2);
16941  SDNodeFlags Flags = N->getFlags();
16942  EVT VT = N->getValueType(0);
16943  SelectionDAG &DAG = DCI.DAG;
16945  unsigned Opc = N->getOpcode();
16946  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16947  bool LegalOps = !DCI.isBeforeLegalizeOps();
16948  SDLoc Loc(N);
16949 
16950  if (!isOperationLegal(ISD::FMA, VT))
16951  return SDValue();
16952 
16953  // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16954  // since (fnmsub a b c)=-0 while c-ab=+0.
16955  if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16956  return SDValue();
16957 
16958  // (fma (fneg a) b c) => (fnmsub a b c)
16959  // (fnmsub (fneg a) b c) => (fma a b c)
16960  if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16961  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16962 
16963  // (fma a (fneg b) c) => (fnmsub a b c)
16964  // (fnmsub a (fneg b) c) => (fma a b c)
16965  if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16966  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16967 
16968  return SDValue();
16969 }
16970 
16971 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16972  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16973  if (!Subtarget.is64BitELFABI())
16974  return false;
16975 
16976  // If not a tail call then no need to proceed.
16977  if (!CI->isTailCall())
16978  return false;
16979 
16980  // If sibling calls have been disabled and tail-calls aren't guaranteed
16981  // there is no reason to duplicate.
16982  auto &TM = getTargetMachine();
16983  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16984  return false;
16985 
16986  // Can't tail call a function called indirectly, or if it has variadic args.
16987  const Function *Callee = CI->getCalledFunction();
16988  if (!Callee || Callee->isVarArg())
16989  return false;
16990 
16991  // Make sure the callee and caller calling conventions are eligible for tco.
16992  const Function *Caller = CI->getParent()->getParent();
16994  CI->getCallingConv()))
16995  return false;
16996 
16997  // If the function is local then we have a good chance at tail-calling it
16998  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16999 }
17000 
17001 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
17002  if (!Subtarget.hasVSX())
17003  return false;
17004  if (Subtarget.hasP9Vector() && VT == MVT::f128)
17005  return true;
17006  return VT == MVT::f32 || VT == MVT::f64 ||
17007  VT == MVT::v4f32 || VT == MVT::v2f64;
17008 }
17009 
17010 bool PPCTargetLowering::
17011 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17012  const Value *Mask = AndI.getOperand(1);
17013  // If the mask is suitable for andi. or andis. we should sink the and.
17014  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17015  // Can't handle constants wider than 64-bits.
17016  if (CI->getBitWidth() > 64)
17017  return false;
17018  int64_t ConstVal = CI->getZExtValue();
17019  return isUInt<16>(ConstVal) ||
17020  (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17021  }
17022 
17023  // For non-constant masks, we can always use the record-form and.
17024  return true;
17025 }
17026 
17027 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17028 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17029 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17030 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17031 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17032 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17033  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
17034  assert(Subtarget.hasP9Altivec() &&
17035  "Only combine this when P9 altivec supported!");
17036  EVT VT = N->getValueType(0);
17037  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17038  return SDValue();
17039 
17040  SelectionDAG &DAG = DCI.DAG;
17041  SDLoc dl(N);
17042  if (N->getOperand(0).getOpcode() == ISD::SUB) {
17043  // Even for signed integers, if it's known to be positive (as signed
17044  // integer) due to zero-extended inputs.
17045  unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17046  unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17047  if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17048  SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
17049  (SubOpcd1 == ISD::ZERO_EXTEND ||
17050  SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
17051  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17052  N->getOperand(0)->getOperand(0),
17053  N->getOperand(0)->getOperand(1),
17054  DAG.getTargetConstant(0, dl, MVT::i32));
17055  }
17056 
17057  // For type v4i32, it can be optimized with xvnegsp + vabsduw
17058  if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17059  N->getOperand(0).hasOneUse()) {
17060  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17061  N->getOperand(0)->getOperand(0),
17062  N->getOperand(0)->getOperand(1),
17063  DAG.getTargetConstant(1, dl, MVT::i32));
17064  }
17065  }
17066 
17067  return SDValue();
17068 }
17069 
17070 // For type v4i32/v8ii16/v16i8, transform
17071 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17072 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17073 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17074 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17075 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17076  DAGCombinerInfo &DCI) const {
17077  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
17078  assert(Subtarget.hasP9Altivec() &&
17079  "Only combine this when P9 altivec supported!");
17080 
17081  SelectionDAG &DAG = DCI.DAG;
17082  SDLoc dl(N);
17083  SDValue Cond = N->getOperand(0);
17084  SDValue TrueOpnd = N->getOperand(1);
17085  SDValue FalseOpnd = N->getOperand(2);
17086  EVT VT = N->getOperand(1).getValueType();
17087 
17088  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17089  FalseOpnd.getOpcode() != ISD::SUB)
17090  return SDValue();
17091 
17092  // ABSD only available for type v4i32/v8i16/v16i8
17093  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17094  return SDValue();
17095 
17096  // At least to save one more dependent computation
17097  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17098  return SDValue();
17099 
17100  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17101 
17102  // Can only handle unsigned comparison here
17103  switch (CC) {
17104  default:
17105  return SDValue();
17106  case ISD::SETUGT:
17107  case ISD::SETUGE:
17108  break;
17109  case ISD::SETULT:
17110  case ISD::SETULE:
17111  std::swap(TrueOpnd, FalseOpnd);
17112  break;
17113  }
17114 
17115  SDValue CmpOpnd1 = Cond.getOperand(0);
17116  SDValue CmpOpnd2 = Cond.getOperand(1);
17117 
17118  // SETCC CmpOpnd1 CmpOpnd2 cond
17119  // TrueOpnd = CmpOpnd1 - CmpOpnd2
17120  // FalseOpnd = CmpOpnd2 - CmpOpnd1
17121  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17122  TrueOpnd.getOperand(1) == CmpOpnd2 &&
17123  FalseOpnd.getOperand(0) == CmpOpnd2 &&
17124  FalseOpnd.getOperand(1) == CmpOpnd1) {
17125  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17126  CmpOpnd1, CmpOpnd2,
17127  DAG.getTargetConstant(0, dl, MVT::i32));
17128  }
17129 
17130  return SDValue();
17131 }
17132 
17133 /// getAddrModeForFlags - Based on the set of address flags, select the most
17134 /// optimal instruction format to match by.
17135 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17136  // This is not a node we should be handling here.
17137  if (Flags == PPC::MOF_None)
17138  return PPC::AM_None;
17139  // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17140  for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17141  if ((Flags & FlagSet) == FlagSet)
17142  return PPC::AM_DForm;
17143  for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17144  if ((Flags & FlagSet) == FlagSet)
17145  return PPC::AM_DSForm;
17146  for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17147  if ((Flags & FlagSet) == FlagSet)
17148  return PPC::AM_DQForm;
17149  // If no other forms are selected, return an X-Form as it is the most
17150  // general addressing mode.
17151  return PPC::AM_XForm;
17152 }
17153 
17154 /// Set alignment flags based on whether or not the Frame Index is aligned.
17155 /// Utilized when computing flags for address computation when selecting
17156 /// load and store instructions.
17157 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17158  SelectionDAG &DAG) {
17159  bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17160  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17161  if (!FI)
17162  return;
17163  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17164  unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17165  // If this is (add $FI, $S16Imm), the alignment flags are already set
17166  // based on the immediate. We just need to clear the alignment flags
17167  // if the FI alignment is weaker.
17168  if ((FrameIndexAlign % 4) != 0)
17169  FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17170  if ((FrameIndexAlign % 16) != 0)
17171  FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17172  // If the address is a plain FrameIndex, set alignment flags based on
17173  // FI alignment.
17174  if (!IsAdd) {
17175  if ((FrameIndexAlign % 4) == 0)
17176  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17177  if ((FrameIndexAlign % 16) == 0)
17178  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17179  }
17180 }
17181 
17182 /// Given a node, compute flags that are used for address computation when
17183 /// selecting load and store instructions. The flags computed are stored in
17184 /// FlagSet. This function takes into account whether the node is a constant,
17185 /// an ADD, OR, or a constant, and computes the address flags accordingly.
17186 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17187  SelectionDAG &DAG) {
17188  // Set the alignment flags for the node depending on if the node is
17189  // 4-byte or 16-byte aligned.
17190  auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17191  if ((Imm & 0x3) == 0)
17192  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17193  if ((Imm & 0xf) == 0)
17194  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17195  };
17196 
17197  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
17198  // All 32-bit constants can be computed as LIS + Disp.
17199  const APInt &ConstImm = CN->getAPIntValue();
17200  if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17201  FlagSet |= PPC::MOF_AddrIsSImm32;
17202  SetAlignFlagsForImm(ConstImm.getZExtValue());
17203  setAlignFlagsForFI(N, FlagSet, DAG);
17204  }
17205  if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17206  FlagSet |= PPC::MOF_RPlusSImm34;
17207  else // Let constant materialization handle large constants.
17208  FlagSet |= PPC::MOF_NotAddNorCst;
17209  } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17210  // This address can be represented as an addition of:
17211  // - Register + Imm16 (possibly a multiple of 4/16)
17212  // - Register + Imm34
17213  // - Register + PPCISD::Lo
17214  // - Register + Register
17215  // In any case, we won't have to match this as Base + Zero.
17216  SDValue RHS = N.getOperand(1);
17217  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
17218  const APInt &ConstImm = CN->getAPIntValue();
17219  if (ConstImm.isSignedIntN(16)) {
17220  FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
17221  SetAlignFlagsForImm(ConstImm.getZExtValue());
17222  setAlignFlagsForFI(N, FlagSet, DAG);
17223  }
17224  if (ConstImm.isSignedIntN(34))
17225  FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
17226  else
17227  FlagSet |= PPC::MOF_RPlusR; // Register.
17228  } else if (RHS.getOpcode() == PPCISD::Lo &&
17229  !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
17230  FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
17231  else
17232  FlagSet |= PPC::MOF_RPlusR;
17233  } else { // The address computation is not a constant or an addition.
17234  setAlignFlagsForFI(N, FlagSet, DAG);
17235  FlagSet |= PPC::MOF_NotAddNorCst;
17236  }
17237 }
17238 
17239 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
17240 /// the address flags of the load/store instruction that is to be matched.
17241 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
17242  SelectionDAG &DAG) const {
17243  unsigned FlagSet = PPC::MOF_None;
17244 
17245  // Compute subtarget flags.
17246  if (!Subtarget.hasP9Vector())
17247  FlagSet |= PPC::MOF_SubtargetBeforeP9;
17248  else {
17249  FlagSet |= PPC::MOF_SubtargetP9;
17250  if (Subtarget.hasPrefixInstrs())
17251  FlagSet |= PPC::MOF_SubtargetP10;
17252  }
17253  if (Subtarget.hasSPE())
17254  FlagSet |= PPC::MOF_SubtargetSPE;
17255 
17256  // Mark this as something we don't want to handle here if it is atomic
17257  // or pre-increment instruction.
17258  if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17259  if (LSB->isIndexed())
17260  return PPC::MOF_None;
17261 
17262  // Compute in-memory type flags. This is based on if there are scalars,
17263  // floats or vectors.
17264  const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17265  assert(MN && "Parent should be a MemSDNode!");
17266  EVT MemVT = MN->getMemoryVT();
17267  unsigned Size = MemVT.getSizeInBits();
17268  if (MemVT.isScalarInteger()) {
17269  assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!");
17270  if (Size < 32)
17271  FlagSet |= PPC::MOF_SubWordInt;
17272  else if (Size == 32)
17273  FlagSet |= PPC::MOF_WordInt;
17274  else
17275  FlagSet |= PPC::MOF_DoubleWordInt;
17276  } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
17277  if (Size == 128)
17278  FlagSet |= PPC::MOF_Vector;
17279  else if (Size == 256)
17280  FlagSet |= PPC::MOF_Vector256;
17281  else
17282  llvm_unreachable("Not expecting illegal vectors!");
17283  } else { // Floating point type: can be scalar, f128 or vector types.
17284  if (Size == 32 || Size == 64)
17285  FlagSet |= PPC::MOF_ScalarFloat;
17286  else if (MemVT == MVT::f128 || MemVT.isVector())
17287  FlagSet |= PPC::MOF_Vector;
17288  else
17289  llvm_unreachable("Not expecting illegal scalar floats!");
17290  }
17291 
17292  // Compute flags for address computation.
17293  computeFlagsForAddressComputation(N, FlagSet, DAG);
17294 
17295  // Compute type extension flags.
17296  if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17297  switch (LN->getExtensionType()) {
17298  case ISD::SEXTLOAD:
17299  FlagSet |= PPC::MOF_SExt;
17300  break;
17301  case ISD::EXTLOAD:
17302  case ISD::ZEXTLOAD:
17303  FlagSet |= PPC::MOF_ZExt;
17304  break;
17305  case ISD::NON_EXTLOAD:
17306  FlagSet |= PPC::MOF_NoExt;
17307  break;
17308  }
17309  } else
17310  FlagSet |= PPC::MOF_NoExt;
17311 
17312  // For integers, no extension is the same as zero extension.
17313  // We set the extension mode to zero extension so we don't have
17314  // to add separate entries in AddrModesMap for loads and stores.
17315  if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
17316  FlagSet |= PPC::MOF_ZExt;
17317  FlagSet &= ~PPC::MOF_NoExt;
17318  }
17319 
17320  // If we don't have prefixed instructions, 34-bit constants should be
17321  // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
17322  bool IsNonP1034BitConst =
17324  FlagSet) == PPC::MOF_RPlusSImm34;
17325  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
17326  IsNonP1034BitConst)
17327  FlagSet |= PPC::MOF_NotAddNorCst;
17328 
17329  return FlagSet;
17330 }
17331 
17332 /// SelectForceXFormMode - Given the specified address, force it to be
17333 /// represented as an indexed [r+r] operation (an XForm instruction).
17335  SDValue &Base,
17336  SelectionDAG &DAG) const {
17337 
17339  int16_t ForceXFormImm = 0;
17340  if (provablyDisjointOr(DAG, N) &&
17341  !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
17342  Disp = N.getOperand(0);
17343  Base = N.getOperand(1);
17344  return Mode;
17345  }
17346 
17347  // If the address is the result of an add, we will utilize the fact that the
17348  // address calculation includes an implicit add. However, we can reduce
17349  // register pressure if we do not materialize a constant just for use as the
17350  // index register. We only get rid of the add if it is not an add of a
17351  // value and a 16-bit signed constant and both have a single use.
17352  if (N.getOpcode() == ISD::ADD &&
17353  (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
17354  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
17355  Disp = N.getOperand(0);
17356  Base = N.getOperand(1);
17357  return Mode;
17358  }
17359 
17360  // Otherwise, use R0 as the base register.
17361  Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17362  N.getValueType());
17363  Base = N;
17364 
17365  return Mode;
17366 }
17367 
17368 // If we happen to match to an aligned D-Form, check if the Frame Index is
17369 // adequately aligned. If it is not, reset the mode to match to X-Form.
17370 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
17371  PPC::AddrMode &Mode) {
17372  if (!isa<FrameIndexSDNode>(N))
17373  return;
17374  if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
17375  (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
17376  Mode = PPC::AM_XForm;
17377 }
17378 
17379 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
17380 /// compute the address flags of the node, get the optimal address mode based
17381 /// on the flags, and set the Base and Disp based on the address mode.
17383  SDValue N, SDValue &Disp,
17384  SDValue &Base,
17385  SelectionDAG &DAG,
17386  MaybeAlign Align) const {
17387  SDLoc DL(Parent);
17388 
17389  // Compute the address flags.
17390  unsigned Flags = computeMOFlags(Parent, N, DAG);
17391 
17392  // Get the optimal address mode based on the Flags.
17393  PPC::AddrMode Mode = getAddrModeForFlags(Flags);
17394 
17395  // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
17396  // Select an X-Form load if it is not.
17397  setXFormForUnalignedFI(N, Flags, Mode);
17398 
17399  // Set Base and Disp accordingly depending on the address mode.
17400  switch (Mode) {
17401  case PPC::AM_DForm:
17402  case PPC::AM_DSForm:
17403  case PPC::AM_DQForm: {
17404  // This is a register plus a 16-bit immediate. The base will be the
17405  // register and the displacement will be the immediate unless it
17406  // isn't sufficiently aligned.
17407  if (Flags & PPC::MOF_RPlusSImm16) {
17408  SDValue Op0 = N.getOperand(0);
17409  SDValue Op1 = N.getOperand(1);
17410  int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
17411  if (!Align || isAligned(*Align, Imm)) {
17412  Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
17413  Base = Op0;
17414  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
17415  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17416  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17417  }
17418  break;
17419  }
17420  }
17421  // This is a register plus the @lo relocation. The base is the register
17422  // and the displacement is the global address.
17423  else if (Flags & PPC::MOF_RPlusLo) {
17424  Disp = N.getOperand(1).getOperand(0); // The global address.
17427  Disp.getOpcode() == ISD::TargetConstantPool ||
17428  Disp.getOpcode() == ISD::TargetJumpTable);
17429  Base = N.getOperand(0);
17430  break;
17431  }
17432  // This is a constant address at most 32 bits. The base will be
17433  // zero or load-immediate-shifted and the displacement will be
17434  // the low 16 bits of the address.
17435  else if (Flags & PPC::MOF_AddrIsSImm32) {
17436  auto *CN = cast<ConstantSDNode>(N);
17437  EVT CNType = CN->getValueType(0);
17438  uint64_t CNImm = CN->getZExtValue();
17439  // If this address fits entirely in a 16-bit sext immediate field, codegen
17440  // this as "d, 0".
17441  int16_t Imm;
17442  if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
17443  Disp = DAG.getTargetConstant(Imm, DL, CNType);
17444  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17445  CNType);
17446  break;
17447  }
17448  // Handle 32-bit sext immediate with LIS + Addr mode.
17449  if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
17450  (!Align || isAligned(*Align, CNImm))) {
17451  int32_t Addr = (int32_t)CNImm;
17452  // Otherwise, break this down into LIS + Disp.
17453  Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
17454  Base =
17455  DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
17456  uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
17457  Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
17458  break;
17459  }
17460  }
17461  // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
17462  Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
17463  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
17464  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17465  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17466  } else
17467  Base = N;
17468  break;
17469  }
17470  case PPC::AM_None:
17471  break;
17472  default: { // By default, X-Form is always available to be selected.
17473  // When a frame index is not aligned, we also match by XForm.
17474  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
17475  Base = FI ? N : N.getOperand(1);
17476  Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17477  N.getValueType())
17478  : N.getOperand(0);
17479  break;
17480  }
17481  }
17482  return Mode;
17483 }
17484 
17486  bool Return,
17487  bool IsVarArg) const {
17488  switch (CC) {
17489  case CallingConv::Cold:
17490  return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
17491  default:
17492  return CC_PPC64_ELF_FIS;
17493  }
17494 }
17495 
17498  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
17499  if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
17502 }
17503 
17506  unsigned Size = AI->getPointerOperand()
17507  ->getType()
17510  if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
17513 }
17514 
17515 static Intrinsic::ID
17517  switch (BinOp) {
17518  default:
17519  llvm_unreachable("Unexpected AtomicRMW BinOp");
17520  case AtomicRMWInst::Xchg:
17521  return Intrinsic::ppc_atomicrmw_xchg_i128;
17522  case AtomicRMWInst::Add:
17523  return Intrinsic::ppc_atomicrmw_add_i128;
17524  case AtomicRMWInst::Sub:
17525  return Intrinsic::ppc_atomicrmw_sub_i128;
17526  case AtomicRMWInst::And:
17527  return Intrinsic::ppc_atomicrmw_and_i128;
17528  case AtomicRMWInst::Or:
17529  return Intrinsic::ppc_atomicrmw_or_i128;
17530  case AtomicRMWInst::Xor:
17531  return Intrinsic::ppc_atomicrmw_xor_i128;
17532  case AtomicRMWInst::Nand:
17533  return Intrinsic::ppc_atomicrmw_nand_i128;
17534  }
17535 }
17536 
17538  IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
17539  Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
17541  "Only support quadword now");
17542  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17543  Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
17544  assert(ValTy->getPrimitiveSizeInBits() == 128);
17547  Type *Int64Ty = Type::getInt64Ty(M->getContext());
17548  Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
17549  Value *IncrHi =
17550  Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
17551  Value *Addr =
17552  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
17553  Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
17554  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17555  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17556  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
17557  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
17558  return Builder.CreateOr(
17559  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
17560 }
17561 
17563  IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
17564  Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
17566  "Only support quadword now");
17567  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17568  Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
17569  assert(ValTy->getPrimitiveSizeInBits() == 128);
17570  Function *IntCmpXchg =
17571  Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
17572  Type *Int64Ty = Type::getInt64Ty(M->getContext());
17573  Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
17574  Value *CmpHi =
17575  Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
17576  Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
17577  Value *NewHi =
17578  Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
17579  Value *Addr =
17580  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
17581  emitLeadingFence(Builder, CI, Ord);
17582  Value *LoHi =
17583  Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
17584  emitTrailingFence(Builder, CI, Ord);
17585  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17586  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17587  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
17588  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
17589  return Builder.CreateOr(
17590  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
17591 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PPCISD::READ_TIME_BASE
@ READ_TIME_BASE
Definition: PPCISelLowering.h:259
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:872
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition: SelectionDAGNodes.h:1579
llvm::PPCRegisterInfo
Definition: PPCRegisterInfo.h:57
i
i
Definition: README.txt:29
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:451
llvm::PPCII::MO_TLSGD_FLAG
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:123
llvm::PPCISD::MTCTR
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
Definition: PPCISelLowering.h:192
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1368
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:491
llvm::PPCII::MO_GOT_FLAG
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition: PPC.h:114
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3093
llvm::PPCSubtarget::useCRBits
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:248
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:38
ValueTypes.h
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1218
llvm::PPCSubtarget::hasRecipPrec
bool hasRecipPrec() const
Definition: PPCSubtarget.h:260
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::PPC::isXXINSERTWMask
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
Definition: PPCISelLowering.cpp:2168
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1542
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:99
llvm::PPCTargetLowering::getPICJumpTableRelocBase
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Definition: PPCISelLowering.cpp:3129
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2313
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:462
llvm::RISCVAttrs::StackAlign
StackAlign
Definition: RISCVAttributes.h:37
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::isConstOrConstSplat
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition: SelectionDAG.cpp:9575
llvm::StringRef::data
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2121
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:563
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:337
llvm::SDValue::dump
void dump() const
Definition: SelectionDAGNodes.h:1165
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4630
llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition: TargetLowering.h:2268
llvm::XCOFF::XTY_ER
@ XTY_ER
External reference.
Definition: XCOFF.h:181
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:923
llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1388
llvm::ISD::ArgFlagsTy::isSplit
bool isSplit() const
Definition: TargetCallingConv.h:132
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:138
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1379
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1072
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1364
llvm::SDUse
Represents a use of a SDNode.
Definition: SelectionDAGNodes.h:277
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1304
llvm::PPCII::MO_PLT
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:101
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
MachineInstr.h
MathExtras.h
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:398
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm::PPCSubtarget::getRegisterInfo
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:213
llvm::PPCTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition: PPCISelLowering.cpp:3114
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::PPC::isXXSLDWIShuffleMask
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
Definition: PPCISelLowering.cpp:2243
llvm::TargetLowering::getSqrtResultForDenormInput
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
Definition: TargetLowering.h:4338
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:202
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::ISD::JumpTable
@ JumpTable
Definition: ISDOpcodes.h:81
llvm::PPC::PRED_LT
@ PRED_LT
Definition: PPCPredicates.h:27
llvm::PPCFunctionInfo::appendParameterType
void appendParameterType(ParamType Type)
Definition: PPCMachineFunctionInfo.cpp:68
llvm::PPCISD::FCTIDZ
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
Definition: PPCISelLowering.h:72
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: MachineValueType.h:1058
llvm::CC_PPC32_SVR4_VarArg
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::MachineFrameInfo::hasVAStart
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
Definition: MachineFrameInfo.h:596
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:958
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
llvm::MCSectionXCOFF
Definition: MCSectionXCOFF.h:32
llvm::SectionKind::getMetadata
static SectionKind getMetadata()
Definition: SectionKind.h:182
llvm::PICLevel::SmallPIC
@ SmallPIC
Definition: CodeGen.h:33
llvm::PPCISD::FSQRT
@ FSQRT
Square root instruction.
Definition: PPCISelLowering.h:96
CalculateTailCallArgDest
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
Definition: PPCISelLowering.cpp:4976
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
llvm::PPCSubtarget::usesFunctionDescriptors
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:378
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:36
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
PPCRegisterInfo.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:791
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::ISD::FLT_ROUNDS_
@ FLT_ROUNDS_
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:825
llvm::PPCSubtarget::hasPrefixInstrs
bool hasPrefixInstrs() const
Definition: PPCSubtarget.h:277
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:350
llvm::TargetLowering::CallLoweringInfo::IsPatchPoint
bool IsPatchPoint
Definition: TargetLowering.h:3738
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition: TargetMachine.cpp:162
llvm::PPCISD::BDNZ
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
Definition: PPCISelLowering.h:288
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
type
EmitTailCallStoreFPAndRetAddr
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
Definition: PPCISelLowering.cpp:4952
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1376
llvm::PPCISD::RFEBB
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
Definition: PPCISelLowering.h:420
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1336
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1380
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::PPC::isXXBRDShuffleMask
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
Definition: PPCISelLowering.cpp:2326
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition: TargetLowering.cpp:4879
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:373
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4138
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:946
llvm::MachineModuleInfo::getContext
const MCContext & getContext() const
Definition: MachineModuleInfo.h:167
llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Definition: SelectionDAG.h:1989
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::MachineMemOperand::getAlign
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Definition: MachineOperand.cpp:1082
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition: SelectionDAGNodes.h:1556
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::TargetOptions
Definition: TargetOptions.h:113
llvm::PPCFunctionInfo::setTailCallSPDelta
void setTailCallSPDelta(int size)
Definition: PPCMachineFunctionInfo.h:179
AtomicOrdering.h
Atomic ordering constants.
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::PPCTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Definition: PPCISelLowering.cpp:2930
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:461
llvm::PPCFunctionInfo::getMinReservedArea
unsigned getMinReservedArea() const
Definition: PPCMachineFunctionInfo.h:175
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:898
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:735
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::PPCISD::FP_EXTEND_HALF
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
Definition: PPCISelLowering.h:450
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:410
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:349
llvm::PPCISD::VABSD
@ VABSD
An SDNode for Power9 vector absolute value difference.
Definition: PPCISelLowering.h:446
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::PPCSubtarget::hasMMA
bool hasMMA() const
Definition: PPCSubtarget.h:279
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
llvm::MVT::ppcf128
@ ppcf128
Definition: MachineValueType.h:59
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::isIntS34Immediate
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
Definition: PPCISelLowering.cpp:2570
llvm::InlineAsm::Op_FirstOperand
@ Op_FirstOperand
Definition: InlineAsm.h:215
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:72
llvm::Function
Definition: Function.h:62
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:519
llvm::PPCISD::CLRBHRB
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
Definition: PPCISelLowering.h:413
llvm::PPCII::MO_GOT_TLSLD_PCREL_FLAG
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:145
StringRef.h
llvm::ISD::ArgFlagsTy::isInConsecutiveRegsLast
bool isInConsecutiveRegsLast() const
Definition: TargetCallingConv.h:127
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:666
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:436
llvm::PPC::PRED_GE
@ PRED_GE
Definition: PPCPredicates.h:30
computeFlagsForAddressComputation
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
Definition: PPCISelLowering.cpp:17186
llvm::PPCFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition: PPCMachineFunctionInfo.h:219
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1005
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:411
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:48
llvm::PPCFunctionInfo::VectorChar
@ VectorChar
Definition: PPCMachineFunctionInfo.h:30
llvm::PPC::AM_DForm
@ AM_DForm
Definition: PPCISelLowering.h:712
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:931
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:752
DisablePPCPreinc
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1697
llvm::CCState::addLoc
void addLoc(const CCValAssign &V)
Definition: CallingConvLower.h:253
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:469
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
DM
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
OP_COPY
@ OP_COPY
Definition: ARMISelLowering.cpp:8106
llvm::PPC::MOF_SubtargetP9
@ MOF_SubtargetP9
Definition: PPCISelLowering.h:704
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:125
llvm::ISD::INIT_TRAMPOLINE
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1079
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3091
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::PPCTargetLowering::shouldExpandAtomicRMWInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: PPCISelLowering.cpp:17497
llvm::MipsISD::LDL
@ LDL
Definition: MipsISelLowering.h:249
llvm::ISD::FSHL
@ FSHL
Definition: ISDOpcodes.h:662
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:804
CC_AIX
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
Definition: PPCISelLowering.cpp:6521
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:738
llvm::PPCTargetLowering::SelectAddressRegReg
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
Definition: PPCISelLowering.cpp:2587
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1578
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1169
llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition: TargetLowering.h:1154
Statistic.h
getIntrinsicForAtomicRMWBinOp128
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
Definition: PPCISelLowering.cpp:17516
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1375
llvm::SDNodeFlags::hasNoNaNs
bool hasNoNaNs() const
Definition: SelectionDAGNodes.h:427
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::PPCTargetLowering::isAccessedAsGotIndirect
bool isAccessedAsGotIndirect(SDValue N) const
Definition: PPCISelLowering.cpp:16027
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1012
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8147
llvm::PPCISD::MTVSRA
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
Definition: PPCISelLowering.h:215
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:430
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:271
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::PPC::AM_None
@ AM_None
Definition: PPCISelLowering.h:711
llvm::Sched::ILP
@ ILP
Definition: TargetLowering.h:102
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:251
llvm::PPC::MOF_NotAddNorCst
@ MOF_NotAddNorCst
Definition: PPCISelLowering.h:684
llvm::PPCFunctionInfo::addLiveInAttr
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
Definition: PPCMachineFunctionInfo.h:243
llvm::PPCISD::MAT_PCREL_ADDR
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
Definition: PPCISelLowering.h:455
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::PPCISD::ACC_BUILD
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
Definition: PPCISelLowering.h:468
llvm::PPCTargetLowering::expandVSXLoadForLE
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
Definition: PPCISelLowering.cpp:14203
llvm::XCOFF::XMC_PR
@ XMC_PR
Program Code.
Definition: XCOFF.h:45
llvm::TargetLowering::lowerCmpEqZeroToCtlzSrl
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7916
llvm::PPCSubtarget::getFrameLowering
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:203
llvm::PPCCCState
Definition: PPCCCState.h:19
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:269
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1328
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:875
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1351
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::TargetLowering::getPICJumpTableRelocBase
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
Definition: TargetLowering.cpp:448
llvm::PPCInstrInfo
Definition: PPCInstrInfo.h:191
llvm::PPCISD::VPERM
@ VPERM
VPERM - The PPC VPERM Instruction.
Definition: PPCISelLowering.h:100
llvm::ISD::USUBSAT
@ USUBSAT
Definition: ISDOpcodes.h:337
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
LowerLabelRef
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:3030
llvm::PPCTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Definition: PPCISelLowering.cpp:16169
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
MachineBasicBlock.h
llvm::GlobalAddressSDNode::getTargetFlags
unsigned getTargetFlags() const
Definition: SelectionDAGNodes.h:1714
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3531
llvm::PPC::MOF_RPlusSImm16Mult16
@ MOF_RPlusSImm16Mult16
Definition: PPCISelLowering.h:688
llvm::PPCISD::STXSIX
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
Definition: PPCISelLowering.h:531
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition: SelectionDAG.cpp:9019
llvm::PPCISD::XSMINCDP
@ XSMINCDP
Definition: PPCISelLowering.h:56
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::PPCTargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
Definition: PPCISelLowering.cpp:16059
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2263
llvm::PPC::DIR_PWR5X
@ DIR_PWR5X
Definition: PPCSubtarget.h:57
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:262
llvm::MemOp
Definition: TargetLowering.h:111
llvm::PPCTargetLowering::getSingleConstraintMatchWeight
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
Definition: PPCISelLowering.cpp:15638
llvm::PPC::isSplatShuffleMask
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
Definition: PPCISelLowering.cpp:2100
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:719
R4
#define R4(n)
llvm::TargetLoweringBase::shouldExpandAtomicRMWInIR
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: TargetLowering.h:2007
APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...
llvm::PPCISD::BUILD_SPE64
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
Definition: PPCISelLowering.h:227
areCallingConvEligibleForTCO_64SVR4
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
Definition: PPCISelLowering.cpp:4765
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::PPCSubtarget::hasP9Vector
bool hasP9Vector() const
Definition: PPCSubtarget.h:274
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition: TargetLowering.cpp:45
llvm::TargetLowering::C_Memory
@ C_Memory
Definition: TargetLowering.h:4141
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:827
llvm::ISD::ArgFlagsTy::isZExt
bool isZExt() const
Definition: TargetCallingConv.h:73
Shift
bool Shift
Definition: README.txt:468
DisablePPCUnaligned
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1370
llvm::PPC::isXXPERMDIShuffleMask
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
Definition: PPCISelLowering.cpp:2342
MachineJumpTableInfo.h
llvm::TargetMachine::getRelocationModel
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
Definition: TargetMachine.cpp:70
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::PPCISD::FADDRTZ
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
Definition: PPCISelLowering.h:294
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::TargetLoweringBase::getPrefLoopAlignment
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Definition: TargetLowering.h:1771
llvm::BranchProbability::getZero
static BranchProbability getZero()
Definition: BranchProbability.h:49
Module.h
Module.h This file contains the declarations for the Module class.
llvm::PPCISD::Lo
@ Lo
Definition: PPCISelLowering.h:136
llvm::PPCII::MO_LO
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:156
llvm::PPCTargetLowering::ccAssignFnForCall
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
Definition: PPCISelLowering.cpp:17485
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::AttributeList
Definition: Attributes.h:398
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
TargetInstrInfo.h
llvm::FloatToBits
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:663
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1353
llvm::PPCSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Scheduling customization.
Definition: PPCSubtarget.cpp:191
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7476
llvm::PPCISD::XXSPLT
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
Definition: PPCISelLowering.h:104
llvm::PPCISD::LD_GOT_TPREL_L
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Definition: PPCISelLowering.h:327
llvm::PPCISD::FCFIDU
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
Definition: PPCISelLowering.h:65
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
isConstantOrUndef
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
Definition: PPCISelLowering.cpp:1766
llvm::MachineRegisterInfo::getLiveInVirtReg
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
Definition: MachineRegisterInfo.cpp:454
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4306
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:823
llvm::PPCISD::STRICT_FCTIDZ
@ STRICT_FCTIDZ
Definition: PPCISelLowering.h:483
llvm::PPCISD::FNMSUB
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
Definition: PPCISelLowering.h:170
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
mapArgRegToOffsetAIX
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
Definition: PPCISelLowering.cpp:6794
llvm::PPCTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition: PPCISelLowering.cpp:15510
llvm::PPCISD::XXSPLTI32DX
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
Definition: PPCISelLowering.h:113
llvm::ISD::CALLSEQ_START
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1026
llvm::PPCII::MO_GOT_TLSGD_PCREL_FLAG
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:140
llvm::CCState::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: CallingConvLower.h:258
llvm::PPC::MOF_RPlusSImm34
@ MOF_RPlusSImm34
Definition: PPCISelLowering.h:689
llvm::PPCFunctionInfo::setVarArgsNumGPR
void setVarArgsNumGPR(unsigned Num)
Definition: PPCMachineFunctionInfo.h:226
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:807
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
llvm::PPC::AM_DQForm
@ AM_DQForm
Definition: PPCISelLowering.h:714
llvm::codeview::EncodedFramePtrReg::None
@ None
llvm::CallBase::isMustTailCall
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: Instructions.cpp:298
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:356
llvm::PPCTargetLowering::emitEHSjLjSetJmp
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:11417
llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition: TargetLowering.h:3755
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition: TargetLowering.h:2288
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition: SelectionDAGBuilder.cpp:9407
llvm::SelectionDAG::isSplatValue
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0)
Test whether V has a splatted value for all the demanded elements.
Definition: SelectionDAG.cpp:2462
FPR
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
Definition: PPCISelLowering.cpp:3817
llvm::PPCFunctionInfo
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
Definition: PPCMachineFunctionInfo.h:24
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1246
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::PPC::AM_DSForm
@ AM_DSForm
Definition: PPCISelLowering.h:713
llvm::PPCISD::DYNALLOC
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
Definition: PPCISelLowering.h:144
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:216
llvm::PPCSubtarget::needsTwoConstNR
bool needsTwoConstNR() const
Definition: PPCSubtarget.h:270
llvm::TargetLoweringBase::isJumpTableRelative
virtual bool isJumpTableRelative() const
Definition: TargetLoweringBase.cpp:2019
llvm::ISD::FMAXNUM_IEEE
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:906
fixupShuffleMaskForPermutedSToV
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:14447
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:855
llvm::TargetLoweringBase::getVectorIdxTy
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
Definition: TargetLowering.h:391
llvm::PPCTargetLowering::EmitPartwordAtomicBinary
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
Definition: PPCISelLowering.cpp:11202
llvm::PPC::DIR_PWR6X
@ DIR_PWR6X
Definition: PPCSubtarget.h:59
llvm::SelectionDAG::getCommutedVectorShuffle
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
Definition: SelectionDAG.cpp:1950
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
getVectorCompareInfo
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
Definition: PPCISelLowering.cpp:10052
STLExtras.h
llvm::PPCFunctionInfo::VectorInt
@ VectorInt
Definition: PPCMachineFunctionInfo.h:32
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1041
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1303
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::PPCISD::MFBHRBE
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
Definition: PPCISelLowering.h:417
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1275
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:356
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::PPCSubtarget::hasFSQRT
bool hasFSQRT() const
Definition: PPCSubtarget.h:255
llvm::minidump::MemoryType
MemoryType
Definition: Minidump.h:98
stripModuloOnShift
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:16562
llvm::BlockAddressSDNode
Definition: SelectionDAGNodes.h:2114
llvm::PPCSubtarget::getStackPointerRegister
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:408
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:172
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
Format.h
llvm::PPCSubtarget::hasFRSQRTES
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:259
llvm::PPCTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition: PPCISelLowering.cpp:10999
llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition: TargetLoweringBase.cpp:1164
llvm::PPCTargetLowering::CallFlags::IsPatchPoint
const bool IsPatchPoint
Definition: PPCISelLowering.h:1125
llvm::PPC::MOF_SubtargetP10
@ MOF_SubtargetP10
Definition: PPCISelLowering.h:705
SelectionDAG.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::PPCTargetLowering::shouldExpandBuildVectorWithShuffles
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
Definition: PPCISelLowering.cpp:16410
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:442
llvm::PPCISD::LBRX
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
Definition: PPCISelLowering.h:507
DisableILPPref
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
llvm::PPCSubtarget::is64BitELFABI
bool is64BitELFABI() const
Definition: PPCSubtarget.h:349
Use.h
This defines the Use class.
llvm::PPCISD::FCFID
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
Definition: PPCISelLowering.h:61
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:429
llvm::PPCTargetLowering::expandVSXStoreForLE
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
Definition: PPCISelLowering.cpp:14271
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1366
llvm::PPC::isVPKUHUMShuffleMask
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Definition: PPCISelLowering.cpp:1776
CalculateStackSlotSize
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
Definition: PPCISelLowering.cpp:3823
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:627
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::convertToNonDenormSingle
bool convertToNonDenormSingle(APInt &ArgAPInt)
Definition: PPCISelLowering.cpp:9048
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:867
llvm::CC_PPC64_ELF_FIS
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::PPCISD::BDZ
@ BDZ
Definition: PPCISelLowering.h:289
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
llvm::PPCTargetLowering::PPCTargetLowering
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
Definition: PPCISelLowering.cpp:143
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::ISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
MachineRegisterInfo.h
llvm::PPCISD::ADDI_TLSLD_L
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
Definition: PPCISelLowering.h:376
KnownBits.h
llvm::TargetLoweringBase::getShiftAmountTy
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Definition: TargetLoweringBase.cpp:922
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition: SelectionDAGNodes.h:1484
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:177
llvm::PPC::MOF_SubtargetSPE
@ MOF_SubtargetSPE
Definition: PPCISelLowering.h:706
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:1960
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
llvm::XCOFF::CsectProperties
Definition: XCOFF.h:409
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::ISD::INLINEASM
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:980
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
fixupFuncForFI
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
Definition: PPCISelLowering.cpp:2643
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:414
llvm::GlobalValue::getSection
StringRef getSection() const
Definition: Globals.cpp:162
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::TargetLowering::getNegatedExpression
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition: TargetLowering.cpp:6009
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::ISD::ROTL
@ ROTL
Definition: ISDOpcodes.h:660
llvm::TargetLoweringObjectFile
Definition: TargetLoweringObjectFile.h:43
llvm::PPC::isXXBRWShuffleMask
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
Definition: PPCISelLowering.cpp:2322
PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition: AArch64PerfectShuffle.h:25
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::PPCISD::UINT_VEC_TO_FP
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
Definition: PPCISelLowering.h:240
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:904
llvm::PPCISD::ANDI_rec_1_GT_BIT
@ ANDI_rec_1_GT_BIT
Definition: PPCISelLowering.h:255
llvm::PPCTargetLowering::CallFlags::HasNest
const bool HasNest
Definition: PPCISelLowering.h:1127
llvm::CC_PPC32_SVR4_ByVal
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1411
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:746
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
PPCSubtarget.h
CommandLine.h
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1335
llvm::PPCISD::STXVD2X
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:560
llvm::PPCII::MO_PCREL_FLAG
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:109
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
getCallOpcode
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, const PPCSubtarget &Subtarget, const TargetMachine &TM)
Definition: PPCISelLowering.cpp:5176
llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition: ISDOpcodes.h:390
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:963
TargetLowering.h
This file describes how to lower LLVM code to machine code.
llvm::PPCSubtarget::hasP8Altivec
bool hasP8Altivec() const
Definition: PPCSubtarget.h:272
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::BlockAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:2128
llvm::ISD::FSHR
@ FSHR
Definition: ISDOpcodes.h:663
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7426
llvm::PPCISD::STRICT_FCTIDUZ
@ STRICT_FCTIDUZ
Definition: PPCISelLowering.h:485
llvm::PPCTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
Definition: PPCISelLowering.cpp:10823
llvm::PPCISD::STFIWX
@ STFIWX
STFIWX - The STFIWX instruction.
Definition: PPCISelLowering.h:511
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
R2
#define R2(n)
llvm::PPC::MOF_RPlusLo
@ MOF_RPlusLo
Definition: PPCISelLowering.h:686
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:621
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
llvm::TargetFrameLowering::getStackAlignment
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:95
llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition: TargetLowering.h:3733
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:632
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:428
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:39
llvm::PPCISD::CR6SET
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
Definition: PPCISelLowering.h:307
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1113
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::TargetOptions::NoInfsFPMath
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
Definition: TargetOptions.h:156
EnsureStackAlignment
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
Definition: PPCISelLowering.cpp:3929
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
llvm::ExternalSymbolSDNode
Definition: SelectionDAGNodes.h:2156
llvm::PPCTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition: PPCISelLowering.cpp:16189
llvm::PPCISD::TC_RETURN
@ TC_RETURN
TC_RETURN - A tail call return.
Definition: PPCISelLowering.h:304
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
GlobalValue.h
llvm::PPCSubtarget::hasFPRND
bool hasFPRND() const
Definition: PPCSubtarget.h:263
llvm::PPCISD::STRICT_FCFIDUS
@ STRICT_FCFIDUS
Definition: PPCISelLowering.h:492
MachineLoopInfo.h
haveEfficientBuildVectorPattern
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
Definition: PPCISelLowering.cpp:8957
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1113
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1442
llvm::PPCTargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition: PPCISelLowering.cpp:1571
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1458
CalculateStackSlotUsed
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
Definition: PPCISelLowering.cpp:3881
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:668
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
isFunctionGlobalAddress
static bool isFunctionGlobalAddress(SDValue Callee)
Definition: PPCISelLowering.cpp:5073
llvm::PPCSubtarget::hasFRES
bool hasFRES() const
Definition: PPCSubtarget.h:257
CalculateStackSlotAlignment
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
Definition: PPCISelLowering.cpp:3839
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:410
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SelectionDAGNodes.h
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
llvm::SelectionDAG::UpdateNodeOperands
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
Definition: SelectionDAG.cpp:8237
llvm::PPCFunctionInfo::setReturnAddrSaveIndex
void setReturnAddrSaveIndex(int idx)
Definition: PPCMachineFunctionInfo.h:160
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition: SelectionDAGNodes.h:692
llvm::PPCISD::GlobalBaseReg
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
Definition: PPCISelLowering.h:156
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:408
llvm::PowerOf2Floor
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:695
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
llvm::PPCTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition: PPCISelLowering.cpp:16538
llvm::SelectionDAG::getBoolExtOrTrunc
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
Definition: SelectionDAG.cpp:1281
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:773
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:730
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
llvm::PPCTargetLowering::hasSPE
bool hasSPE() const
Definition: PPCISelLowering.cpp:1567
llvm::PPCSubtarget::getTOCPointerRegister
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:402
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1377
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7527
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::PPCISD::LFIWZX
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
Definition: PPCISelLowering.h:521
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:720
llvm::PPCFrameLowering
Definition: PPCFrameLowering.h:22
llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition: SelectionDAG.h:906
llvm::PPC::MOF_RPlusR
@ MOF_RPlusR
Definition: PPCISelLowering.h:690
llvm::User
Definition: User.h:44
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:1987
llvm::PPC::MOF_ZExt
@ MOF_ZExt
Definition: PPCISelLowering.h:680
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:443
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:409
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2352
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1348
isSplat
static bool isSplat(ArrayRef< Value * > VL)
Definition: SLPVectorizer.cpp:236
llvm::PPCISD::XXSPLTI_SP_TO_DP
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
Definition: PPCISelLowering.h:109
llvm::JumpTableSDNode
Definition: SelectionDAGNodes.h:1803
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::GlobalObject
Definition: GlobalObject.h:28
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3170
StoreTailCallArgumentsToStackSlot
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
Definition: PPCISelLowering.cpp:4935
llvm::PPCTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Definition: PPCISelLowering.cpp:10907
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: MachineValueType.h:1062
BuildVSLDOI
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
Definition: PPCISelLowering.cpp:8932
MCContext.h
CalculateTailCallSPDiff
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
Definition: PPCISelLowering.cpp:4601
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::PPCISD::VCMP_rec
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
Definition: PPCISelLowering.h:277
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition: SelectionDAG.cpp:2438
llvm::PPCTargetLowering::CallFlags::CallConv
const CallingConv::ID CallConv
Definition: PPCISelLowering.h:1122
llvm::PPCSubtarget::isISA3_1
bool isISA3_1() const
Definition: PPCSubtarget.h:327
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:883
isXXBRShuffleMaskHelper
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
Definition: PPCISelLowering.cpp:2305
TargetMachine.h
llvm::PPCTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Definition: PPCISelLowering.cpp:15900
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3525
llvm::PPCTargetLowering::CallFlags
Structure that collects some common arguments that get passed around between the functions for call l...
Definition: PPCISelLowering.h:1121
llvm::PPCFunctionInfo::setVarArgsNumFPR
void setVarArgsNumFPR(unsigned Num)
Definition: PPCMachineFunctionInfo.h:240
Param
Value * Param
Definition: NVPTXLowerArgs.cpp:163
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
PPCCCState.h
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:658
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:233
llvm::ISD::FMINNUM_IEEE
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:905
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:887
llvm::PPCFunctionInfo::setUsesTOCBasePtr
void setUsesTOCBasePtr()
Definition: PPCMachineFunctionInfo.h:213
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::PPCSubtarget::hasFPCVT
bool hasFPCVT() const
Definition: PPCSubtarget.h:264
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:326
llvm::PPCISD::ATOMIC_CMP_SWAP_8
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
Definition: PPCISelLowering.h:573
llvm::PPCISD::VADD_SPLAT
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
Definition: PPCISelLowering.h:406
ANDIGlueBug
cl::opt< bool > ANDIGlueBug
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition: MachineOperand.cpp:1012
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
callIntrinsic
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
Definition: PPCISelLowering.cpp:10991
llvm::MCID::Return
@ Return
Definition: MCInstrDesc.h:151
llvm::ISD::DELETED_NODE
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2168
llvm::TypeSize::Fixed
static TypeSize Fixed(ScalarTy MinVal)
Definition: TypeSize.h:423
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3129
llvm::PPCSubtarget::hasEFPU2
bool hasEFPU2() const
Definition: PPCSubtarget.h:267
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::PPCFunctionInfo::setHasNonRISpills
void setHasNonRISpills()
Definition: PPCMachineFunctionInfo.h:201
PPCFrameLowering.h
llvm::PPC::MOF_Vector
@ MOF_Vector
Definition: PPCISelLowering.h:699
llvm::PPCTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
Definition: PPCISelLowering.cpp:15604
llvm::PPCTargetLowering::getNegatedExpression
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition: PPCISelLowering.cpp:16448
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:145
llvm::Instruction
Definition: Instruction.h:46
llvm::PPCTargetLowering::emitMaskedAtomicRMWIntrinsic
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
Definition: PPCISelLowering.cpp:17537
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:146
llvm::DataLayout::getABITypeAlign
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:825
llvm::PPCSubtarget::descriptorTOCAnchorOffset
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:384
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition: SelectionDAGNodes.h:1496
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2225
llvm::PPCTargetLowering::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
Definition: PPCISelLowering.cpp:16335
getEstimateRefinementSteps
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:12662
isSplatBV
static bool isSplatBV(SDValue Op)
Definition: PPCISelLowering.cpp:14409
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:243
llvm::PPC::DIR_PWR4
@ DIR_PWR4
Definition: PPCSubtarget.h:55
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::PPC::PRED_BIT_SET
@ PRED_BIT_SET
Definition: PPCPredicates.h:57
llvm::PPCISD::SRA_ADDZE
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Definition: PPCISelLowering.h:180
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::PPCISD::MFVSR
@ MFVSR
Direct move from a VSX register to a GPR.
Definition: PPCISelLowering.h:212
llvm::CCValAssign::getCustomMem
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:116
llvm::TargetLoweringBase::getSDagStackGuard
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: TargetLoweringBase.cpp:1991
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:655
APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...
llvm::DataLayout::getLargestLegalIntTypeSizeInBits
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:860
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:882
PPC.h
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::ISD::FP16_TO_FP
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:858
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1726
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
ErrorHandling.h
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:901
llvm::PPCTargetLowering::SelectAddressPCRel
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
Definition: PPCISelLowering.cpp:2870
UseAbsoluteJumpTables
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7203
llvm::InlineAsm::Kind_Clobber
@ Kind_Clobber
Definition: InlineAsm.h:236
llvm::MVT::INVALID_SIMPLE_VALUE_TYPE
@ INVALID_SIMPLE_VALUE_TYPE
Definition: MachineValueType.h:38
DebugLoc.h
llvm::ms_demangle::QualifierMangleMode::Drop
@ Drop
llvm::PPCISD::PPC32_GOT
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
Definition: PPCISelLowering.h:312
SmallPtrSet.h
llvm::PPC::isVSLDOIShuffleMask
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
Definition: PPCISelLowering.cpp:2056
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:492
llvm::PPC::MOF_DoubleWordInt
@ MOF_DoubleWordInt
Definition: PPCISelLowering.h:697
llvm::PPCISD::XXSWAPD
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
Definition: PPCISelLowering.h:427
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition: TargetLowering.h:3106
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
BuildIntrinsicOp
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
Definition: PPCISelLowering.cpp:8903
llvm::PPCTargetLowering::CallFlags::IsIndirect
const bool IsIndirect
Definition: PPCISelLowering.h:1126
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
llvm::PPCSubtarget::hasPartwordAtomics
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:305
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::PPCISD::ADDIS_GOT_TPREL_HA
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
Definition: PPCISelLowering.h:321
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:155
llvm::PPC::DIR_E500
@ DIR_E500
Definition: PPCSubtarget.h:51
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1361
llvm::ISD::TargetGlobalAddress
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::PPCSubtarget::getInstrInfo
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:206
llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: TargetLoweringBase.cpp:1978
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1712
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1132
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:915
llvm::PPC::PRED_LE
@ PRED_LE
Definition: PPCPredicates.h:28
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1217
llvm::CallInst::isTailCall
bool isTailCall() const
Definition: Instructions.h:1669
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Register
Promote Memory to Register
Definition: Mem2Reg.cpp:110
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2116
llvm::PPCFrameLowering::getFramePointerSaveOffset
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
Definition: PPCFrameLowering.cpp:2687
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:740
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1125
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::CallBase::getCallingConv
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1449
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::array_lengthof
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1377
MCSectionXCOFF.h
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
llvm::PPCSubtarget::hasFCPSGN
bool hasFCPSGN() const
Definition: PPCSubtarget.h:254
isSignExtended
static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII)
Definition: PPCISelLowering.cpp:11147
llvm::PPCISD::SRL
@ SRL
These nodes represent PPC shifts.
Definition: PPCISelLowering.h:165
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:510
llvm::None
const NoneType None
Definition: None.h:23
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:742
prepareIndirectCall
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
Definition: PPCISelLowering.cpp:5312
llvm::SelectionDAG::getEVTAlign
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
Definition: SelectionDAG.cpp:1124
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:91
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
convertIntToFP
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
Definition: PPCISelLowering.cpp:8307
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:145
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition: MachineInstr.h:108
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::InlineAsm::getKind
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:328
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::RISCVISD::DIVW
@ DIVW
Definition: RISCVISelLowering.h:54
llvm::PPCISD::MTVSRZ
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
Definition: PPCISelLowering.h:218
Type.h
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1362
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
BranchProbability.h
llvm::PPCFrameLowering::getTOCSaveOffset
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
Definition: PPCFrameLowering.cpp:2683
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:96
llvm::TargetLoweringObjectFile::getFunctionEntryPointSymbol
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
Definition: TargetLoweringObjectFile.h:272
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1367
llvm::PPCTargetLowering::SelectOptimalAddrMode
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Definition: PPCISelLowering.cpp:17382
llvm::PPCISD::LFIWAX
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Definition: PPCISelLowering.h:516
llvm::PPCISD::STORE_VEC_BE
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:565
provablyDisjointOr
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
Definition: PPCISelLowering.cpp:2539
llvm::EVT::isExtended
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:135
llvm::GlobalIndirectSymbol::getBaseObject
const GlobalObject * getBaseObject() const
Definition: Globals.cpp:467
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
llvm::MachineJumpTableInfo::EK_LabelDifference32
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
Definition: MachineJumpTableInfo.h:68
llvm::PPCSubtarget::isGVIndirectSymbol
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Definition: PPCSubtarget.cpp:233
llvm::PPCTargetLowering::getSDagStackGuard
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: PPCISelLowering.cpp:16532
llvm::PPCISD::FP_TO_SINT_IN_VSR
@ FP_TO_SINT_IN_VSR
Definition: PPCISelLowering.h:82
llvm::PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
Definition: PPCISelLowering.cpp:17562
llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition: MachineInstrBuilder.h:213
llvm::PPCTargetLowering::getPrefLoopAlignment
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Definition: PPCISelLowering.cpp:15553
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:340
llvm::PPCII::MO_GOT_TPREL_PCREL_FLAG
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:150
llvm::Instruction::hasAtomicLoad
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
Definition: Instruction.cpp:619
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::PPCISD::XXMFACC
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
Definition: PPCISelLowering.h:480
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:176
llvm::PPCISD::FCTIWZ
@ FCTIWZ
Definition: PPCISelLowering.h:73
llvm::CodeModel::Model
Model
Definition: CodeGen.h:28
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:873
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition: TargetLowering.h:3147
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::PPCFunctionInfo::isLRStoreRequired
bool isLRStoreRequired() const
Definition: PPCMachineFunctionInfo.h:211
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:77
llvm::ISD::BlockAddress
@ BlockAddress
Definition: ISDOpcodes.h:84
llvm::PPC::PRED_EQ
@ PRED_EQ
Definition: PPCPredicates.h:29
llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition: TargetLowering.h:3756
llvm::MachineFunction::getMMI
MachineModuleInfo & getMMI() const
Definition: MachineFunction.h:573
llvm::PPC::MOF_None
@ MOF_None
Definition: PPCISelLowering.h:676
llvm::TargetLowering::getPICJumpTableRelocBaseExpr
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition: TargetLowering.cpp:463
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:388
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:463
llvm::PPCTargetLowering::hasInlineStackProbe
bool hasInlineStackProbe(MachineFunction &MF) const override
Definition: PPCISelLowering.cpp:11660
llvm::PPCSubtarget::isAIXABI
bool isAIXABI() const
Definition: PPCSubtarget.h:345
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:622
llvm::PPCSubtarget::hasLDBRX
bool hasLDBRX() const
Definition: PPCSubtarget.h:288
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:152
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2185
llvm::PPCSubtarget::isSVR4ABI
bool isSVR4ABI() const
Definition: PPCSubtarget.h:346
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3108
llvm::PPCTargetLowering::SelectAddressRegRegOnly
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
Definition: PPCISelLowering.cpp:2832
llvm::PPCISD::CALL_NOP
@ CALL_NOP
Definition: PPCISelLowering.h:187
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:278
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition: TargetLowering.cpp:283
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:626
llvm::PPC::isXXBRHShuffleMask
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
Definition: PPCISelLowering.cpp:2318
llvm::PPCISD::ADD_TLS
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
Definition: PPCISelLowering.h:335
llvm::cl::opt< bool >
llvm::APFloat
Definition: APFloat.h:701
llvm::SDNode::use_begin
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Definition: SelectionDAGNodes.h:775
llvm::NVPTX::PTXLdStInstCode::V4
@ V4
Definition: NVPTX.h:124
llvm::CCValAssign::LocInfo
LocInfo
Definition: CallingConvLower.h:35
llvm::ISD::Register
@ Register
Definition: ISDOpcodes.h:74
llvm::ISD::GET_DYNAMIC_AREA_OFFSET
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1193
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:179
llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition: SelectionDAGNodes.h:1117
llvm::PPCFrameLowering::getLinkageSize
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
Definition: PPCFrameLowering.h:165
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::PPCSubtarget::isTargetLinux
bool isTargetLinux() const
Definition: PPCSubtarget.h:343
llvm::peekThroughBitcasts
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Definition: SelectionDAG.cpp:9547
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:761
llvm::PPCTargetLowering::getPICJumpTableRelocBaseExpr
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition: PPCISelLowering.cpp:3145
llvm::PPCTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition: PPCISelLowering.cpp:16404
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
VI
@ VI
Definition: SIInstrInfo.cpp:7542
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::PPCII::MO_TPREL_HA
@ MO_TPREL_HA
Definition: PPC.h:160
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:876
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
AIXSSPCanaryWordName
static const char AIXSSPCanaryWordName[]
Definition: PPCISelLowering.cpp:138
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:37
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:744
findConsecutiveLoad
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:12903
llvm::PPCII::MO_TLSGDM_FLAG
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:135
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1617
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::PPCISD::LXSIZX
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
Definition: PPCISelLowering.h:526
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:118
llvm::SelectionDAG::MaxRecursionDepth
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:416
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::PPC::MOF_ScalarFloat
@ MOF_ScalarFloat
Definition: PPCISelLowering.h:698
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:451
RuntimeLibcalls.h
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:786
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1587
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:894
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1332
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::TargetLowering::verifyReturnAddressArgumentIsConstant
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:5977
llvm::InlineAsm::Kind_RegDef
@ Kind_RegDef
Definition: InlineAsm.h:234
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:275
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::PPCISD::MFFS
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
Definition: PPCISelLowering.h:297
llvm::PPCISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition: PPCISelLowering.h:48
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition: TargetLowering.h:3729
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1471
llvm::PPC::isVMRGHShuffleMask
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
Definition: PPCISelLowering.cpp:1937
llvm::PPCTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
Definition: PPCISelLowering.cpp:15824
llvm::ISD::AssertZext
@ AssertZext
Definition: ISDOpcodes.h:62
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::PPC::MOF_SExt
@ MOF_SExt
Definition: PPCISelLowering.h:679
llvm::TargetLoweringBase::NegatibleCost::Expensive
@ Expensive
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1088
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:241
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::TargetLowering::getJumpTableEncoding
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Definition: TargetLowering.cpp:435
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::PPCSubtarget::is32BitELFABI
bool is32BitELFABI() const
Definition: PPCSubtarget.h:350
llvm::PPCSubtarget::needsSwapsForVSXMemOps
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:333
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
prepareDescriptorIndirectCall
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5323
llvm::InlineAsm::Kind_Imm
@ Kind_Imm
Definition: InlineAsm.h:237
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:575
llvm::DenseMap
Definition: DenseMap.h:715
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
MCSymbolXCOFF.h
llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition: TargetLowering.h:3750
isAlternatingShuffMask
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
Definition: PPCISelLowering.cpp:14395
llvm::ISD::OutputArg
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
Definition: TargetCallingConv.h:233
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition: TargetLowering.h:2134
PPCInstrInfo.h
llvm::GlobalValue::hasComdat
bool hasComdat() const
Definition: GlobalValue.h:222
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
llvm::PPCSubtarget::hasSTFIWX
bool hasSTFIWX() const
Definition: PPCSubtarget.h:261
llvm::TargetLowering::CW_Register
@ CW_Register
Definition: TargetLowering.h:4157
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:20
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:888
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1737
llvm::PPC::AddrMode
AddrMode
Definition: PPCISelLowering.h:710
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:7912
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition: TargetLowering.h:3536
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:465
llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition: TargetLowering.h:2294
llvm::ISD::FP_TO_FP16
@ FP_TO_FP16
Definition: ISDOpcodes.h:859
combineBVOfConsecutiveLoads
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
Definition: PPCISelLowering.cpp:13741
llvm::DenormalMode
Represent subnormal handling kind for floating point instruction inputs and outputs.
Definition: FloatingPointMode.h:67
llvm::PPCFrameLowering::getReturnSaveOffset
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
Definition: PPCFrameLowering.h:149
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::PPCTargetLowering::getRegisterByName
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
Definition: PPCISelLowering.cpp:16008
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1125
MCRegisterInfo.h
llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition: TargetLowering.h:4164
llvm::ISD::UADDSAT
@ UADDSAT
Definition: ISDOpcodes.h:328
llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition: TargetLowering.h:3754
llvm::PPCTargetLowering::enableAggressiveFMAFusion
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Definition: PPCISelLowering.cpp:1742
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:336
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2278
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:750
llvm::PPCFunctionInfo::getVarArgsNumFPR
unsigned getVarArgsNumFPR() const
Definition: PPCMachineFunctionInfo.h:239
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:414
llvm::SDNode::dump
void dump() const
Dump this node, for debugging.
Definition: SelectionDAGDumper.cpp:539
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1269
ArrayRef.h
llvm::TargetLoweringBase::NegatibleCost
NegatibleCost
Enum that specifies when a float negation is beneficial.
Definition: TargetLowering.h:267
llvm::PPCISD::SRA
@ SRA
Definition: PPCISelLowering.h:166
llvm::SelectionDAG::getAnyExtOrTrunc
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition: SelectionDAG.cpp:1263
llvm::CCValAssign::getMem
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:102
DisableInnermostLoopAlign32
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
llvm::PPCSubtarget::isPredictableSelectIsExpensive
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:414
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::GlobalAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:1713
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::TargetLoweringBase::hasBigEndianPartOrdering
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
Definition: TargetLowering.h:1562
llvm::APInt::getBoolValue
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:483
llvm::PPCISD::Hi
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Definition: PPCISelLowering.h:135
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:131
llvm::PPC::PRED_NE
@ PRED_NE
Definition: PPCPredicates.h:32
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:850
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:541
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2291
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:605
llvm::PPCII::MO_PIC_FLAG
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:105
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:115
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1359
llvm::InlineAsm::Kind_Mem
@ Kind_Mem
Definition: InlineAsm.h:238
llvm::APFloatBase::PPCDoubleDouble
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:185
llvm::PPCISD::PROBED_ALLOCA
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
Definition: PPCISelLowering.h:153
llvm::PPCTargetLowering::getStackProbeSize
unsigned getStackProbeSize(MachineFunction &MF) const
Definition: PPCISelLowering.cpp:11668
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition: TargetLowering.h:3728
llvm::PPCFunctionInfo::setVarArgsStackOffset
void setVarArgsStackOffset(int Offset)
Definition: PPCMachineFunctionInfo.h:223
IRBuilder.h
llvm::PPCISD::EXTRACT_SPE
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
Definition: PPCISelLowering.h:230
llvm::ISD::ArgFlagsTy::isNest
bool isNest() const
Definition: TargetCallingConv.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1777
llvm::PPCISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
Definition: PPCISelLowering.h:265
DisableSCO
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::ISD::ADJUST_TRAMPOLINE
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1085
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:615
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:53
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:860
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:638
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16250
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1369
llvm::CCValAssign::getCustomReg
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:93
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1557
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1087
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
llvm::PPCISD::BUILD_FP128
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
Definition: PPCISelLowering.h:221
llvm::ISD::EH_DWARF_CFA
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8585
llvm::PPCTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Definition: PPCISelLowering.cpp:1734
MachineModuleInfo.h
llvm::PPCII::MO_TLS
@ MO_TLS
Definition: PPC.h:169
llvm::ISD::TargetGlobalTLSAddress
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2063
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition: ISDOpcodes.h:95
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:2685
llvm::PPCISD::EXTRACT_VSX_REG
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
Definition: PPCISelLowering.h:477
combineBVOfVecSExt
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13870
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
isConsecutiveLSLoc
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:12786
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:123
llvm::RISCVISD::SRAW
@ SRAW
Definition: RISCVISelLowering.h:49
llvm::PPCTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition: PPCISelLowering.cpp:16399
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::PPCISD::DYNAREAOFFSET
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
Definition: PPCISelLowering.h:149
llvm::TargetLowering::CW_Memory
@ CW_Memory
Definition: TargetLowering.h:4158
R6
#define R6(n)
llvm::MVT::v256i1
@ v256i1
Definition: MachineValueType.h:72
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2312
llvm::SelectionDAG::CreateStackTemporary
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition: SelectionDAG.cpp:2181
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:716
llvm::ISD::VASTART
@ VASTART
Definition: ISDOpcodes.h:1042
llvm::PPCTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition: PPCISelLowering.cpp:16054
llvm::PPC::isVMRGEOShuffleMask
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
Definition: PPCISelLowering.cpp:2027
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1309
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition: MachineMemOperand.h:80
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
info
lazy value info
Definition: LazyValueInfo.cpp:59
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::SelectionDAG::getTokenFactor
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
Definition: SelectionDAG.cpp:10570
llvm::TargetLowering::CW_Default
@ CW_Default
Definition: TargetLowering.h:4160
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::ISD::ArgFlagsTy::setByValSize
void setByValSize(unsigned S)
Definition: TargetCallingConv.h:173
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:935
llvm::PPC::DIR_PWR5
@ DIR_PWR5
Definition: PPCSubtarget.h:56
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::BranchProbability::getOne
static BranchProbability getOne()
Definition: BranchProbability.h:50
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:653
llvm::Type::FP128TyID
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
llvm::isIntS16Immediate
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
Definition: PPCISelLowering.cpp:2520
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:307
llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition: TargetLowering.h:3758
llvm::ISD::ConstantPool
@ ConstantPool
Definition: ISDOpcodes.h:82
llvm::RetCC_PPC
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
TargetOptions.h
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
llvm::CCState::isVarArg
bool isVarArg() const
Definition: CallingConvLower.h:260
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:142
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1296
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:472
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:848
llvm::ISD::TargetConstantPool
@ TargetConstantPool
Definition: ISDOpcodes.h:168
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3753
llvm::BuildVectorSDNode::isConstant
bool isConstant() const
Definition: SelectionDAG.cpp:10488
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:699
llvm::PPCSubtarget::hasFRE
bool hasFRE() const
Definition: PPCSubtarget.h:256
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1425
llvm::PPCISD::SCALAR_TO_VECTOR_PERMUTED
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
Definition: PPCISelLowering.h:248
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:43
llvm::PPCISD::STRICT_FCFIDU
@ STRICT_FCFIDU
Definition: PPCISelLowering.h:490
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::PPCTargetLowering::emitEHSjLjLongJmp
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:11559
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition: SelectionDAGNodes.h:2678
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:629
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1304
llvm::ConstantPoolSDNode
Definition: SelectionDAGNodes.h:1824
None.h
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1075
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
llvm::BlockAddressSDNode::getBlockAddress
const BlockAddress * getBlockAddress() const
Definition: SelectionDAGNodes.h:2127
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:351
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:416
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1541
llvm::BuildVectorSDNode::isConstantSplat
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition: SelectionDAG.cpp:10288
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::SDNode::use_end
static use_iterator use_end()
Definition: SelectionDAGNodes.h:779
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:101
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
isLoad
static bool isLoad(int Opcode)
Definition: ARCInstrInfo.cpp:53
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition: TargetLowering.cpp:7879
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition: TargetLowering.cpp:4509
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:1929
llvm::SDNodeFlags::setNoFPExcept
void setNoFPExcept(bool b)
Definition: SelectionDAGNodes.h:421
llvm::APFloat::isPosZero
bool isPosZero() const
Definition: APFloat.h:1228
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:419
llvm::PPCISD::VCMP
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
Definition: PPCISelLowering.h:271
llvm::ShuffleVectorSDNode::getMaskElt
int getMaskElt(unsigned Idx) const
Definition: SelectionDAGNodes.h:1501
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm::Sched::Hybrid
@ Hybrid
Definition: TargetLowering.h:101
llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:435
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::PPCISD::STRICT_FADDRTZ
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
Definition: PPCISelLowering.h:495
llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition: TargetLowering.h:3538
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:889
llvm::PPCISD::STRICT_FCFIDS
@ STRICT_FCFIDS
Definition: PPCISelLowering.h:491
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::PPCSubtarget::hasFPU
bool hasFPU() const
Definition: PPCSubtarget.h:268
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9173
llvm::ISD::OutputArg::Flags
ArgFlagsTy Flags
Definition: TargetCallingConv.h:234
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:155
llvm::PPCTargetLowering::SelectAddressEVXRegReg
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
Definition: PPCISelLowering.cpp:2551
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2350
PPCPerfectShuffle.h
llvm::TargetRegisterInfo::isTypeLegalForClass
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
Definition: TargetRegisterInfo.h:291
llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition: ISDOpcodes.h:388
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
Compiler.h
llvm::TargetLoweringBase::IsStrictFPEnabled
bool IsStrictFPEnabled
Definition: TargetLowering.h:3162
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3143
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1121
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
llvm::PPCTargetLowering::SelectAddressRegImm34
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
Definition: PPCISelLowering.cpp:2783
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::PPCTargetLowering::CallFlags::IsTailCall
const bool IsTailCall
Definition: PPCISelLowering.h:1123
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetLowering::getCheaperNegatedExpression
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
Definition: TargetLowering.h:3668
llvm::PPCSubtarget::has64BitSupport
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:233
llvm::TargetLowering::isGAPlusOffset
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
Definition: TargetLowering.cpp:4407
llvm::PPCISD::FCFIDUS
@ FCFIDUS
Definition: PPCISelLowering.h:67
combineADDToADDZE
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:16642
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1558
llvm::SDNodeFlags::hasNoInfs
bool hasNoInfs() const
Definition: SelectionDAGNodes.h:428
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:877
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition: SelectionDAGNodes.h:1157
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition: TargetLowering.cpp:4588
isNByteElemShuffleMask
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
Definition: PPCISelLowering.cpp:2141
llvm::TargetMachine::shouldAssumeDSOLocal
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
Definition: TargetMachine.cpp:94
hasSameArgumentList
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
Definition: PPCISelLowering.cpp:4734
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2251
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2205
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition: SelectionDAGNodes.h:905
llvm::PPCFunctionInfo::getVarArgsStackOffset
int getVarArgsStackOffset() const
Definition: PPCMachineFunctionInfo.h:222
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:878
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:456
llvm::TargetLoweringBase::setMinStackArgumentAlignment
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
Definition: TargetLowering.h:2304
isScalarToVec
static SDValue isScalarToVec(SDValue Op)
Definition: PPCISelLowering.cpp:14428
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::PPCISD::PPC32_PICGOT
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
Definition: PPCISelLowering.h:316
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::PPCISD::LXVRZX
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
Definition: PPCISelLowering.h:542
setUsesTOCBasePtr
static void setUsesTOCBasePtr(MachineFunction &MF)
Definition: PPCISelLowering.cpp:3049
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition: MachineMemOperand.h:139
llvm::MVT::v512i1
@ v512i1
Definition: MachineValueType.h:73
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:654
llvm::MVT::v1i128
@ v1i128
Definition: MachineValueType.h:128
generateEquivalentSub
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
Definition: PPCISelLowering.cpp:12970
llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:777
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::ISD::InputArg::Flags
ArgFlagsTy Flags
Definition: TargetCallingConv.h:196
llvm::PPCISD::LD_VSX_LH
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
Definition: PPCISelLowering.h:551
llvm::GlobalValue::isStrongDefinitionForLinker
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:547
llvm::PPC::getSplatIdxForPPCMnemonics
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
Definition: PPCISelLowering.cpp:2398
llvm::PPCTargetLowering::isJumpTableRelative
bool isJumpTableRelative() const override
Definition: PPCISelLowering.cpp:3121
LowerMemOpCallTo
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
Definition: PPCISelLowering.cpp:5024
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::PPCISD::PADDI_DTPREL
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
Definition: PPCISelLowering.h:400
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1527
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1525
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:726
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:135
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::PPCTargetLowering::EmitAtomicBinary
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
Definition: PPCISelLowering.cpp:11029
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition: TargetLowering.h:4140
llvm::PPC::AM_XForm
@ AM_XForm
Definition: PPCISelLowering.h:715
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::RetCC_PPC_Cold
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:634
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition: TargetLowering.h:303
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:717
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::PPCTargetLowering::getScratchRegisters
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Definition: PPCISelLowering.cpp:16387
llvm::PPCISD::LXVD2X
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:536
llvm::PPCSubtarget::hasInvariantFunctionDescriptors
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:300
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2282
llvm::PPCISD::ADDIS_TLSGD_HA
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
Definition: PPCISelLowering.h:340
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:693
llvm::PPCISD::ADDI_DTPREL_L
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
Definition: PPCISelLowering.h:396
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:934
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::PPC::isVPKUDUMShuffleMask
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
Definition: PPCISelLowering.cpp:1844
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:868
llvm::EVT::getEVTString
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:151
llvm::APInt::bitsToDouble
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1777
llvm::PPCISD::FCTIWUZ
@ FCTIWUZ
Definition: PPCISelLowering.h:78
llvm::PPCTargetLowering::isFPExtFree
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
Definition: PPCISelLowering.cpp:16233
CallingConv.h
llvm::PPCISD::ADDI_TLSLD_L_ADDR
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
Definition: PPCISelLowering.h:386
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:338
getLabelAccessInfo
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
Definition: PPCISelLowering.cpp:3017
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition: TargetLowering.h:3744
llvm::SDNode::isOnlyUserOf
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
Definition: SelectionDAG.cpp:9791
j
return j(j<< 16)
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:415
llvm::PPC::DIR_970
@ DIR_970
Definition: PPCSubtarget.h:49
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1378
llvm::PPCISD::FSEL
@ FSEL
FSEL - Traditional three-operand fsel node.
Definition: PPCISelLowering.h:52
llvm::DataLayout::getIntPtrType
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:838
llvm::PPCISD::RET_FLAG
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
Definition: PPCISelLowering.h:204
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9537
llvm::PPCISD::TOC_ENTRY
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
Definition: PPCISelLowering.h:579
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2351
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition: CallingConvLower.h:336
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:146
llvm::PPCTargetLowering::CallFlags::IsVarArg
const bool IsVarArg
Definition: PPCISelLowering.h:1124
setXFormForUnalignedFI
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
Definition: PPCISelLowering.cpp:17370
llvm::PPC::isXXBRQShuffleMask
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
Definition: PPCISelLowering.cpp:2330
llvm::TargetLoweringBase::IntrinsicInfo
Definition: TargetLowering.h:987
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition: ISDOpcodes.h:389
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
PPCCallingConv.h
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:392
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:899
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::PPCFunctionInfo::FixedType
@ FixedType
Definition: PPCMachineFunctionInfo.h:27
getMaxByValAlign
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
Definition: PPCISelLowering.cpp:1524
llvm::PPCISD::STRICT_FCTIWUZ
@ STRICT_FCTIWUZ
Definition: PPCISelLowering.h:486
llvm::PPC::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Definition: PPCFastISel.cpp:2468
llvm::PPCTargetLowering::SelectAddressRegImm
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
Definition: PPCISelLowering.cpp:2678
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2732
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2193
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1700
llvm::KnownBits
Definition: KnownBits.h:23
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:588
EnableQuadwordAtomics
static cl::opt< bool > EnableQuadwordAtomics("ppc-quadword-atomics", cl::desc("enable quadword lock-free atomic operations"), cl::init(false), cl::Hidden)
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:549
llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition: SelectionDAGNodes.h:883
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2216
llvm::AIXCCState::isFixed
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:249
llvm::ISD::isEXTLoad
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
Definition: SelectionDAGNodes.h:2691
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:235
uint16_t
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2353
llvm::PPCISD::STBRX
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
Definition: PPCISelLowering.h:501
CallingConvLower.h
rotate
The same transformation can work with an even modulo with the addition of a rotate
Definition: README.txt:680
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9527
llvm::PPCISD::XSMAXCDP
@ XSMAXCDP
XSMAXCDP, XSMINCDP - C-type min/max instructions.
Definition: PPCISelLowering.h:55
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PPCTargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
Definition: PPCISelLowering.cpp:16514
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:937
llvm::ISD::TargetExternalSymbol
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
getPPCStrictOpcode
static unsigned getPPCStrictOpcode(unsigned Opc)
Definition: PPCISelLowering.cpp:7962
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:366
llvm::object::BCTR
@ BCTR
Definition: ELF.h:95
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1266
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::MachineMemOperand::getSize
uint64_t getSize() const
Return the size in bytes of the memory reference.
Definition: MachineMemOperand.h:236
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1326
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition: TargetLowering.h:4147
llvm::PPCISD::FCTIDUZ
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
Definition: PPCISelLowering.h:77
getSToVPermuted
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:14465
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::PPCISD::ADDIS_DTPREL_HA
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
Definition: PPCISelLowering.h:391
ISDOpcodes.h
Success
#define Success
Definition: AArch64Disassembler.cpp:261
isBLACompatibleAddress
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
Definition: PPCISelLowering.cpp:4906
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:746
Enabled
static bool Enabled
Definition: Statistic.cpp:46
llvm::PPCISD::ANDI_rec_1_EQ_BIT
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
Definition: PPCISelLowering.h:254
llvm::AIXCCState
Definition: PPCCCState.h:41
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ISD::INLINEASM_BR
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:983
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
Casting.h
llvm::InlineAsm::Kind_RegDefEarlyClobber
@ Kind_RegDefEarlyClobber
Definition: InlineAsm.h:235
llvm::PPCFunctionInfo::getVarArgsNumGPR
unsigned getVarArgsNumGPR() const
Definition: PPCMachineFunctionInfo.h:225
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:412
llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition: TargetCallingConv.h:76
llvm::PPCTargetLowering::BuildSDIVPow2
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Definition: PPCISelLowering.cpp:15477
Function.h
llvm::CCState::AllocateStack
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Definition: CallingConvLower.h:425
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::PPCTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Definition: PPCISelLowering.cpp:16246
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
void AddToWorklist(SDNode *N)
Definition: DAGCombiner.cpp:832
llvm::PPC::MOF_SubWordInt
@ MOF_SubWordInt
Definition: PPCISelLowering.h:695
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1728
llvm::SelectionDAG::getMCSymbol
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
Definition: SelectionDAG.cpp:1719
llvm::PPCISD::SWAP_NO_CHAIN
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
Definition: PPCISelLowering.h:431
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:264
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::AtomicCmpXchgInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:651
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1365
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::PPCISD::NodeType
NodeType
Definition: PPCISelLowering.h:46
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
usePartialVectorLoads
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
Definition: PPCISelLowering.cpp:2885
llvm::PPC::isVPKUWUMShuffleMask
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
Definition: PPCISelLowering.cpp:1807
llvm::PPCFunctionInfo::VectorFloat
@ VectorFloat
Definition: PPCMachineFunctionInfo.h:33
llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition: SelectionDAGNodes.h:955
llvm::PPCSubtarget::hasSPE
bool hasSPE() const
Definition: PPCSubtarget.h:266
llvm::TargetLoweringBase::shouldExpandAtomicCmpXchgInIR
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: TargetLowering.h:2001
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.cpp:74
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:384
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::PPCISD::CALL
@ CALL
CALL - A direct function call.
Definition: PPCISelLowering.h:186
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition: TargetLowering.h:232
StringSwitch.h
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition: SelectionDAGNodes.h:371
llvm::PPCISD::CR6UNSET
@ CR6UNSET
Definition: PPCISelLowering.h:308
llvm::PPCTargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: PPCISelLowering.cpp:16522
llvm::TargetLoweringBase::getSchedulingPreference
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
Definition: TargetLowering.h:838
llvm::PPCISD::BCTRL_LOAD_TOC
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
Definition: PPCISelLowering.h:201
llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition: TargetLowering.h:4149
transformCallee
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5218
llvm::PPC::MOF_RPlusSImm16Mult4
@ MOF_RPlusSImm16Mult4
Definition: PPCISelLowering.h:687
combineADDToMAT_PCREL_ADDR
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:16728
llvm::TargetLoweringBase::setJumpIsExpensive
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
Definition: TargetLoweringBase.cpp:949
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::PPCII::MO_HA
@ MO_HA
Definition: PPC.h:157
llvm::InlineAsm::Kind_RegUse
@ Kind_RegUse
Definition: InlineAsm.h:233
llvm::PPCISD::LOAD_VEC_BE
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:547
PPCISelLowering.h
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::SmallSet::end
const_iterator end() const
Definition: SmallSet.h:229
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition: TargetLowering.h:2835
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1037
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1386
llvm::TargetRegisterInfo::getNoPreservedMask
virtual const uint32_t * getNoPreservedMask() const
Return a register mask that clobbers everything.
Definition: TargetRegisterInfo.h:499
llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition: TargetLowering.h:3740
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:794
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:137
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:717
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::PPCISD::ADDIS_TLSLD_HA
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
Definition: PPCISelLowering.h:370
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1149
llvm::TargetLoweringBase::setHasMultipleConditionRegisters
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
Definition: TargetLowering.h:2143
llvm::PPC::MOF_WordInt
@ MOF_WordInt
Definition: PPCISelLowering.h:696
llvm::PPCISD::ADDI_TLSGD_L
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
Definition: PPCISelLowering.h:346
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:413
llvm::checkConvertToNonDenormSingle
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
Definition: PPCISelLowering.cpp:9058
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::APInt::abs
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1863
llvm::PPCTargetLowering::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: PPCISelLowering.cpp:16197
llvm::FPOpFusion::Fast
@ Fast
Definition: TargetOptions.h:37
CodeGen.h
llvm::PPCISD::EXTSWSLI
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
Definition: PPCISelLowering.h:174
callsShareTOCBase
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
Definition: PPCISelLowering.cpp:4618
getCanonicalConstSplat
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
Definition: PPCISelLowering.cpp:8881
llvm::PPCISD::TLSGD_AIX
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
Definition: PPCISelLowering.h:365
llvm::TLSModel::InitialExec
@ InitialExec
Definition: CodeGen.h:45
getBaseWithConstantOffset
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:12774
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::MachineFunction::getPICBaseSymbol
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
Definition: MachineFunction.cpp:693
llvm::PPC::MOF_RPlusSImm16
@ MOF_RPlusSImm16
Definition: PPCISelLowering.h:685
llvm::SDNode::hasNUsesOfValue
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Definition: SelectionDAG.cpp:9762
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1264
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
llvm::PPCISD::MFOCRF
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Definition: PPCISelLowering.h:209
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2102
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1360
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:903
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:833
llvm::PPC::isVMRGLShuffleMask
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
Definition: PPCISelLowering.cpp:1912
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:162
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
llvm::PPCISD::VECINSERT
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
Definition: PPCISelLowering.h:117
getOutputChainFromCallSeq
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
Definition: PPCISelLowering.cpp:5296
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::LSBaseSDNode
Base class for LoadSDNode and StoreSDNode.
Definition: SelectionDAGNodes.h:2230
llvm::PPCISD::CALL_NOTOC
@ CALL_NOTOC
Definition: PPCISelLowering.h:188
Instructions.h
llvm::PPCSubtarget::isUsingPCRelativeCalls
bool isUsingPCRelativeCalls() const
Definition: PPCSubtarget.cpp:245
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4825
llvm::PPCTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition: PPCISelLowering.cpp:11009
llvm::CC_PPC32_SVR4
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::MVT::f128
@ f128
Definition: MachineValueType.h:58
combineBVZEXTLOAD
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13968
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1101
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
truncateScalarIntegerArg
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
Definition: PPCISelLowering.cpp:6778
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1068
PrepareTailCall
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
Definition: PPCISelLowering.cpp:5048
llvm::PPCISD::SINT_VEC_TO_FP
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
Definition: PPCISelLowering.h:236
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:265
SmallVector.h
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:381
llvm::PPC::DIR_PWR6
@ DIR_PWR6
Definition: PPCSubtarget.h:58
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1003
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::PPCISD::LD_SPLAT
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
Definition: PPCISelLowering.h:555
MachineInstrBuilder.h
isGPRShadowAligned
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
Definition: PPCISelLowering.cpp:6490
addShuffleForVecExtend
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
Definition: PPCISelLowering.cpp:13831
llvm::PPCFunctionInfo::LongFloatingPoint
@ LongFloatingPoint
Definition: PPCMachineFunctionInfo.h:29
llvm::MCSymbolXCOFF
Definition: MCSymbolXCOFF.h:20
llvm::InlineAsm::getNumOperandRegisters
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:339
llvm::PPCISD::SHL
@ SHL
Definition: PPCISelLowering.h:167
llvm::PPCISD::VEXTS
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
Definition: PPCISelLowering.h:86
llvm::ISD::isUnsignedIntSetCC
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1394
llvm::ISD::ArgFlagsTy::getNonZeroByValAlign
Align getNonZeroByValAlign() const
Definition: TargetCallingConv.h:153
llvm::CCValAssign::getReg
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:79
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::PPCFunctionInfo::VectorShort
@ VectorShort
Definition: PPCMachineFunctionInfo.h:31
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2282
llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:780
llvm::PPCSubtarget::getEnvironmentPointerRegister
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:396
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:143
llvm::PPCSubtarget::descriptorEnvironmentPointerOffset
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:390
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:54
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:380
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::TargetLowering::useLoadStackGuardNode
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: TargetLowering.h:4619
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1284
llvm::MCSectionXCOFF::getQualNameSymbol
MCSymbolXCOFF * getQualNameSymbol() const
Definition: MCSectionXCOFF.h:102
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:670
llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition: TargetLowering.h:2312
CreateCopyOfByValArgument
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Definition: PPCISelLowering.cpp:5013
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::PPCTargetLowering::decomposeMulByConstant
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Definition: PPCISelLowering.cpp:16288
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::PPCTargetLowering::SelectForceXFormMode
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Definition: PPCISelLowering.cpp:17334
llvm::PPCTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: PPCISelLowering.cpp:11855
isFloatingPointZero
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
Definition: PPCISelLowering.cpp:1752
llvm::PPCTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition: PPCISelLowering.cpp:16213
llvm::PPCISD::PAIR_BUILD
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
Definition: PPCISelLowering.h:471
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition: SelectionDAG.cpp:1975
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::ISD::ArgFlagsTy::isInConsecutiveRegs
bool isInConsecutiveRegs() const
Definition: TargetCallingConv.h:124
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::PPCTargetLowering::useSoftFloat
bool useSoftFloat() const override
Definition: PPCISelLowering.cpp:1563
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition: DAGCombiner.cpp:837
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:667
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:851
llvm::PPCISD::FRE
@ FRE
Reciprocal estimate instructions (unary FP ops).
Definition: PPCISelLowering.h:89
PPCMachineFunctionInfo.h
llvm::ISD::TargetJumpTable
@ TargetJumpTable
Definition: ISDOpcodes.h:167
llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:387
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:260
llvm::PPCISD::ST_VSR_SCAL_INT
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
Definition: PPCISelLowering.h:568
llvm::PPCTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: PPCISelLowering.cpp:17505
llvm::PPCISD::XXPERMDI
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
Definition: PPCISelLowering.h:125
PPC
should just be implemented with a CLZ instruction Since there are other e PPC
Definition: README.txt:709
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition: MachineValueType.h:360
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2349
llvm::PPCFunctionInfo::getFramePointerSaveIndex
int getFramePointerSaveIndex() const
Definition: PPCMachineFunctionInfo.h:156
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2587
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
llvm::KnownBits::isConstant
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition: PPCISelLowering.cpp:9336
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:628
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
widenVec
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
Definition: PPCISelLowering.cpp:8353
llvm::PPC::MOF_NoExt
@ MOF_NoExt
Definition: PPCISelLowering.h:681
MachineMemOperand.h
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:558
llvm::PPC::MOF_Vector256
@ MOF_Vector256
Definition: PPCISelLowering.h:700
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:614
llvm::TargetOptions::UnsafeFPMath
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Definition: TargetOptions.h:150
llvm::PPCISD::GET_TLS_ADDR
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
Definition: PPCISelLowering.h:351
llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition: TargetLowering.h:3757
MachineOperand.h
RegName
#define RegName(no)
llvm::PPCISD::BCTRL
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
Definition: PPCISelLowering.h:196
llvm::PPCTargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Definition: PPCISelLowering.cpp:16430
llvm::PPCFunctionInfo::ShortFloatingPoint
@ ShortFloatingPoint
Definition: PPCMachineFunctionInfo.h:28
isFPExtLoad
static bool isFPExtLoad(SDValue Op)
Definition: PPCISelLowering.cpp:13640
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:886
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:714
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:885
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1109
llvm::PPCFunctionInfo::setLRStoreRequired
void setLRStoreRequired()
Definition: PPCMachineFunctionInfo.h:210
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
DerivedTypes.h
invertFMAOpcode
static unsigned invertFMAOpcode(unsigned Opc)
Definition: PPCISelLowering.cpp:16437
llvm::PPC::MOF_AddrIsSImm32
@ MOF_AddrIsSImm32
Definition: PPCISelLowering.h:692
llvm::PPCFunctionInfo::setFramePointerSaveIndex
void setFramePointerSaveIndex(int Idx)
Definition: PPCMachineFunctionInfo.h:157
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
isTOCSaveRestoreRequired
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5171
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1363
llvm::PPCSubtarget::POPCNTD_Fast
@ POPCNTD_Fast
Definition: PPCSubtarget.h:76
llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition: TargetLowering.h:3751
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:667
llvm::PPCFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition: PPCMachineFunctionInfo.h:220
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::PPCSubtarget::useLongCalls
bool useLongCalls() const
Definition: PPCSubtarget.h:328
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
getRegClassForSVT
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
Definition: PPCISelLowering.cpp:6749
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
llvm::SelectionDAG::ComputeNumSignBits
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: SelectionDAG.cpp:3658
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCISD::CMPB
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
Definition: PPCISelLowering.h:128
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:261
llvm::PPCISD::GET_TLSLD_ADDR
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
Definition: PPCISelLowering.h:381
llvm::PPCISD::ATOMIC_CMP_SWAP_16
@ ATOMIC_CMP_SWAP_16
Definition: PPCISelLowering.h:574
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:195
llvm::PPCSubtarget::hasQuadwordAtomics
bool hasQuadwordAtomics() const
Definition: PPCSubtarget.h:306
llvm::PPCSubtarget::allowsUnalignedFPAccess
bool allowsUnalignedFPAccess() const
Definition: PPCSubtarget.h:297
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1032
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition: SelectionDAG.cpp:1711
llvm::PPCISD::FP_TO_UINT_IN_VSR
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
Definition: PPCISelLowering.h:81
llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition: TargetLowering.h:3127
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:367
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:97
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition: SelectionDAGNodes.h:2716
llvm::PPCII::MO_TPREL_FLAG
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition: PPC.h:127
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::NVPTX::VecShuffle
@ VecShuffle
Definition: NVPTX.h:88
llvm::cl::desc
Definition: CommandLine.h:414
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition: MachineValueType.h:1416
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1408
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:372
llvm::PPC::get_VSPLTI_elt
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
Definition: PPCISelLowering.cpp:2412
llvm::PPCTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition: PPCISelLowering.cpp:15694
needStackSlotPassParameters
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
Definition: PPCISelLowering.cpp:4698
llvm::PPC::MOF_SubtargetBeforeP9
@ MOF_SubtargetBeforeP9
Definition: PPCISelLowering.h:703
llvm::PPCSubtarget::hasLFIWAX
bool hasLFIWAX() const
Definition: PPCSubtarget.h:262
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
raw_ostream.h
llvm::PPCII::MO_TPREL_LO
@ MO_TPREL_LO
Definition: PPC.h:159
llvm::PPCISD::STRICT_FCFID
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
Definition: PPCISelLowering.h:489
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:6801
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:221
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
llvm::PPCISD::FTSQRT
@ FTSQRT
Test instruction for software square root.
Definition: PPCISelLowering.h:93
llvm::TargetLoweringBase::shouldExpandBuildVectorWithShuffles
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
Definition: TargetLowering.h:464
llvm::PPCTargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
Definition: PPCISelLowering.cpp:1575
isVMerge
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
Definition: PPCISelLowering.cpp:1888
llvm::SDNodeFlags::hasNoSignedZeros
bool hasNoSignedZeros() const
Definition: SelectionDAGNodes.h:429
llvm::StringRef::size
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
MachineFunction.h
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
setAlignFlagsForFI
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
Definition: PPCISelLowering.cpp:17157
buildCallOperands
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5416
convertFPToInt
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:7985
PPCPredicates.h
llvm::PPCISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
Definition: PPCISelLowering.h:262
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition: SelectionDAGNodes.h:947
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:43
llvm::PPCTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
Definition: PPCISelLowering.cpp:16242
Value.h
llvm::SelectionDAG::getStackArgumentTokenFactor
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
Definition: SelectionDAG.cpp:6208
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:133
MCExpr.h
llvm::TargetLowering::getSqrtInputTest
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
Definition: TargetLowering.cpp:5987
llvm::PPCTargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
Definition: PPCISelLowering.cpp:16315
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1008
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition: TargetLowering.h:3141
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:874
llvm::PPCTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: PPCISelLowering.cpp:14727
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7443
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::codegen::getCodeModel
CodeModel::Model getCodeModel()
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1016
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition: TargetLowering.h:2301
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::ISD::ROTR
@ ROTR
Definition: ISDOpcodes.h:661
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:220
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:441
TargetRegisterInfo.h
llvm::PPCFunctionInfo::getReturnAddrSaveIndex
int getReturnAddrSaveIndex() const
Definition: PPCMachineFunctionInfo.h:159
llvm::PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
Definition: PPCISelLowering.h:465
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Debug.h
llvm::PPCFunctionInfo::setMinReservedArea
void setMinReservedArea(unsigned size)
Definition: PPCMachineFunctionInfo.h:176
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:140
llvm::SystemZISD::TBEGIN
@ TBEGIN
Definition: SystemZISelLowering.h:160
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1114
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:485
llvm::PPCISD::ADDI_TLSGD_L_ADDR
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
Definition: PPCISelLowering.h:356
isConsecutiveLS
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:12826
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
isValidPCRelNode
static bool isValidPCRelNode(SDValue N)
Definition: PPCISelLowering.cpp:2862
PPCTargetMachine.h
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1334
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition: CallingConvLower.h:148
llvm::PPCISD::STRICT_FCTIWZ
@ STRICT_FCTIWZ
Definition: PPCISelLowering.h:484
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
llvm::PPCSubtarget::hasFRSQRTE
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:258
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::PPCSubtarget::pairedVectorMemops
bool pairedVectorMemops() const
Definition: PPCSubtarget.h:282
llvm::PPCISD::COND_BRANCH
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
Definition: PPCISelLowering.h:284
llvm::PPCSubtarget::use64BitRegs
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:244
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition: TargetLowering.cpp:4448
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:345
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::PPCTargetLowering::emitProbedAlloca
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:11693
llvm::Module::getNamedValue
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
Definition: Module.cpp:113
llvm::PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
Definition: PPCISelLowering.h:460
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1058
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:327
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition: TargetLowering.h:199
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7192
llvm::PPCTargetLowering::getByValTypeAlignment
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
Definition: PPCISelLowering.cpp:1553
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::PPCISD::FRSQRTE
@ FRSQRTE
Definition: PPCISelLowering.h:90
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:23
getNormalLoadInput
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
Definition: PPCISelLowering.cpp:9014
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::PPCISD::VECSHL
@ VECSHL
VECSHL - The PPC vector shift left instruction.
Definition: PPCISelLowering.h:121
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition: SelectionDAG.cpp:266
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::PPCISD::FCFIDS
@ FCFIDS
Definition: PPCISelLowering.h:66
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1158
llvm::TLSModel::LocalExec
@ LocalExec
Definition: CodeGen.h:46
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::PPCSubtarget::isELFv2ABI
bool isELFv2ABI() const
Definition: PPCSubtarget.cpp:242
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:688
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1738
TargetLoweringObjectFileImpl.h
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:380
llvm::LLT
Definition: LowLevelTypeImpl.h:40